diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
168 files changed, 23124 insertions, 1538 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c88760fb52ea..8e22882b66aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -86,7 +86,7 @@ amdgpu-y += \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \ - cyan_skillfish_reg_init.o + cyan_skillfish_reg_init.o soc_v1_0.o # add DF block amdgpu-y += \ @@ -104,7 +104,8 @@ amdgpu-y += \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \ - gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o + gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o \ + mmhub_v4_2_0.o gfxhub_v12_1.o gmc_v12_1.o # add UMC block amdgpu-y += \ @@ -134,7 +135,9 @@ amdgpu-y += \ psp_v12_0.o \ psp_v13_0.o \ psp_v13_0_4.o \ - psp_v14_0.o + psp_v14_0.o \ + psp_v15_0.o \ + psp_v15_0_8.o # add DCE block amdgpu-y += \ @@ -156,7 +159,9 @@ amdgpu-y += \ gfx_v11_0_3.o \ imu_v11_0_3.o \ gfx_v12_0.o \ - imu_v12_0.o + gfx_v12_1.o \ + imu_v12_0.o \ + imu_v12_1.o # add async DMA block amdgpu-y += \ @@ -169,13 +174,15 @@ amdgpu-y += \ sdma_v5_0.o \ sdma_v5_2.o \ sdma_v6_0.o \ - sdma_v7_0.o + sdma_v7_0.o \ + sdma_v7_1.o # add MES block amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ mes_v12_0.o \ + mes_v12_1.o # add GFX userqueue support amdgpu-y += mes_userqueue.o @@ -215,7 +222,8 @@ amdgpu-y += \ jpeg_v4_0_3.o \ jpeg_v4_0_5.o \ jpeg_v5_0_0.o \ - jpeg_v5_0_1.o + jpeg_v5_0_1.o \ + jpeg_v5_3_0.o # add VPE block amdgpu-y += \ @@ -244,7 +252,9 @@ amdgpu-y += \ smuio_v13_0.o \ smuio_v13_0_3.o \ smuio_v13_0_6.o \ - smuio_v14_0_2.o + smuio_v14_0_2.o \ + smuio_v15_0_0.o \ + smuio_v15_0_8.o # add reset block amdgpu-y += \ @@ -275,7 +285,8 @@ amdgpu-y += \ amdgpu_amdkfd_gfx_v10.o \ amdgpu_amdkfd_gfx_v10_3.o \ amdgpu_amdkfd_gfx_v11.o \ - amdgpu_amdkfd_gfx_v12.o + amdgpu_amdkfd_gfx_v12.o \ + amdgpu_amdkfd_gfx_v12_1.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) amdgpu-y += amdgpu_amdkfd_gfx_v7.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9f9774f58ce1..11a36c132905 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -34,12 +34,6 @@ #define pr_fmt(fmt) "amdgpu: " fmt -#ifdef dev_fmt -#undef dev_fmt -#endif - -#define dev_fmt(fmt) "amdgpu: " fmt - #include "amdgpu_ctx.h" #include <linux/atomic.h> @@ -116,6 +110,7 @@ #include "amdgpu_reg_state.h" #include "amdgpu_userq.h" #include "amdgpu_eviction_fence.h" +#include "amdgpu_ip.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -362,59 +357,6 @@ enum amdgpu_kiq_irq { #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 -int amdgpu_device_ip_set_clockgating_state(void *dev, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state); -int amdgpu_device_ip_set_powergating_state(void *dev, - enum amd_ip_block_type block_type, - enum amd_powergating_state state); -void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u64 *flags); -int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); -bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); -bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); -int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block); - -int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block); - -#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM - -struct amdgpu_ip_block_status { - bool valid; - bool sw; - bool hw; - bool late_initialized; - bool hang; -}; - -struct amdgpu_ip_block_version { - const enum amd_ip_block_type type; - const u32 major; - const u32 minor; - const u32 rev; - const struct amd_ip_funcs *funcs; -}; - -struct amdgpu_ip_block { - struct amdgpu_ip_block_status status; - const struct amdgpu_ip_block_version *version; - struct amdgpu_device *adev; -}; - -int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, - enum amd_ip_block_type type, - u32 major, u32 minor); - -struct amdgpu_ip_block * -amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, - enum amd_ip_block_type type); - -int amdgpu_device_ip_block_add(struct amdgpu_device *adev, - const struct amdgpu_ip_block_version *ip_block_version); - /* * BIOS. */ @@ -757,71 +699,6 @@ struct amdgpu_mmio_remap { struct amdgpu_bo *bo; }; -/* Define the HW IP blocks will be used in driver , add more if necessary */ -enum amd_hw_ip_block_type { - GC_HWIP = 1, - HDP_HWIP, - SDMA0_HWIP, - SDMA1_HWIP, - SDMA2_HWIP, - SDMA3_HWIP, - SDMA4_HWIP, - SDMA5_HWIP, - SDMA6_HWIP, - SDMA7_HWIP, - LSDMA_HWIP, - MMHUB_HWIP, - ATHUB_HWIP, - NBIO_HWIP, - MP0_HWIP, - MP1_HWIP, - UVD_HWIP, - VCN_HWIP = UVD_HWIP, - JPEG_HWIP = VCN_HWIP, - VCN1_HWIP, - VCE_HWIP, - VPE_HWIP, - DF_HWIP, - DCE_HWIP, - OSSSYS_HWIP, - SMUIO_HWIP, - PWR_HWIP, - NBIF_HWIP, - THM_HWIP, - CLK_HWIP, - UMC_HWIP, - RSMU_HWIP, - XGMI_HWIP, - DCI_HWIP, - PCIE_HWIP, - ISP_HWIP, - MAX_HWIP -}; - -#define HWIP_MAX_INSTANCE 44 - -#define HW_ID_MAX 300 -#define IP_VERSION_FULL(mj, mn, rv, var, srev) \ - (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev)) -#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0) -#define IP_VERSION_MAJ(ver) ((ver) >> 24) -#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF) -#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF) -#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF) -#define IP_VERSION_SUBREV(ver) ((ver) & 0xF) -#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8) - -struct amdgpu_ip_map_info { - /* Map of logical to actual dev instances/mask */ - uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE]; - int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev, - enum amd_hw_ip_block_type block, - int8_t inst); - uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev, - enum amd_hw_ip_block_type block, - uint32_t mask); -}; - enum amdgpu_uid_type { AMDGPU_UID_TYPE_XCD, AMDGPU_UID_TYPE_AID, @@ -836,6 +713,38 @@ struct amdgpu_uid { struct amdgpu_device *adev; }; +#define MAX_UMA_OPTION_NAME 28 +#define MAX_UMA_OPTION_ENTRIES 19 + +#define AMDGPU_UMA_FLAG_AUTO BIT(1) +#define AMDGPU_UMA_FLAG_CUSTOM BIT(0) + +/** + * struct amdgpu_uma_carveout_option - single UMA carveout option + * @name: Name of the carveout option + * @memory_carved_mb: Amount of memory carved in MB + * @flags: ATCS flags supported by this option + */ +struct amdgpu_uma_carveout_option { + char name[MAX_UMA_OPTION_NAME]; + uint32_t memory_carved_mb; + uint8_t flags; +}; + +/** + * struct amdgpu_uma_carveout_info - table of available UMA carveout options + * @num_entries: Number of available options + * @uma_option_index: The index of the option currently applied + * @update_lock: Lock to serialize changes to the option + * @entries: The array of carveout options + */ +struct amdgpu_uma_carveout_info { + uint8_t num_entries; + uint8_t uma_option_index; + struct mutex update_lock; + struct amdgpu_uma_carveout_option entries[MAX_UMA_OPTION_ENTRIES]; +}; + struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; @@ -891,6 +800,7 @@ struct amdgpu_mqd_prop { uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; uint32_t hqd_queue_priority; + uint32_t mqd_stride_size; bool allow_tunneling; bool hqd_active; uint64_t shadow_addr; @@ -1319,6 +1229,8 @@ struct amdgpu_device { struct work_struct userq_reset_work; struct amdgpu_uid *uid_info; + struct amdgpu_uma_carveout_info uma_info; + /* KFD * Must be last --ends in a flexible-array member. */ @@ -1545,8 +1457,13 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) -#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev)) -#define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev)) +#define amdgpu_asic_supports_baco(adev) \ + ((adev)->asic_funcs->supports_baco ? (adev)->asic_funcs->supports_baco((adev)) : 0) +#define amdgpu_asic_pre_asic_init(adev) \ + { \ + if ((adev)->asic_funcs && (adev)->asic_funcs->pre_asic_init) \ + (adev)->asic_funcs->pre_asic_init((adev)); \ + } #define amdgpu_asic_update_umd_stable_pstate(adev, enter) \ ((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0) #define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c)) @@ -1686,12 +1603,14 @@ int amdgpu_acpi_init(struct amdgpu_device *adev); void amdgpu_acpi_fini(struct amdgpu_device *adev); bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev); bool amdgpu_acpi_is_power_shift_control_supported(void); +bool amdgpu_acpi_is_set_uma_allocation_size_supported(void); int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state); int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, enum amdgpu_ss ss_state); +int amdgpu_acpi_set_uma_allocation_size(struct amdgpu_device *adev, u8 index, u8 type); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size); @@ -1720,6 +1639,7 @@ static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { re static inline void amdgpu_acpi_detect(void) { } static inline void amdgpu_acpi_release(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } +static inline bool amdgpu_acpi_is_set_uma_allocation_size_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, @@ -1727,6 +1647,10 @@ static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, { return 0; } +static inline int amdgpu_acpi_set_uma_allocation_size(struct amdgpu_device *adev, u8 index, u8 type) +{ + return -EINVAL; +} static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index d31460a9e958..c126d1bf2bc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -116,7 +116,9 @@ struct amdgpu_atcs_functions { bool pcie_perf_req; bool pcie_dev_rdy; bool pcie_bus_width; + bool get_uma_size; bool power_shift_control; + bool set_uma_allocation_size; }; struct amdgpu_atcs { @@ -241,7 +243,8 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas * (all asics). * returns 0 on success, error on failure. */ -static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif) +static noinline_for_stack +int amdgpu_atif_verify_interface(struct amdgpu_atif *atif) { union acpi_object *info; struct atif_verify_interface output; @@ -286,7 +289,8 @@ out: * where n is specified in the result if a notifier is used. * Returns 0 on success, error on failure. */ -static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif) +static noinline_for_stack +int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif) { union acpi_object *info; struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg; @@ -354,7 +358,8 @@ out: * * Returns 0 on success, error on failure. */ -static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif) +static noinline_for_stack +int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif) { union acpi_object *info; struct atif_qbtc_output characteristics; @@ -587,7 +592,9 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED; f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED; f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED; + f->get_uma_size = mask & ACPI_ATCS_GET_UMA_SIZE_SUPPORTED; f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED; + f->set_uma_allocation_size = mask & ACPI_ATCS_SET_UMA_ALLOCATION_SIZE_SUPPORTED; } /** @@ -600,7 +607,8 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas * (all asics). * returns 0 on success, error on failure. */ -static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs) +static noinline_for_stack +int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs) { union acpi_object *info; struct atcs_verify_interface output; @@ -664,6 +672,11 @@ bool amdgpu_acpi_is_power_shift_control_supported(void) return amdgpu_acpi_priv.atcs.functions.power_shift_control; } +bool amdgpu_acpi_is_set_uma_allocation_size_supported(void) +{ + return amdgpu_acpi_priv.atcs.functions.set_uma_allocation_size; +} + /** * amdgpu_acpi_pcie_notify_device_ready * @@ -740,7 +753,8 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, size = *(u16 *) info->buffer.pointer; if (size < 3) { - DRM_INFO("ATCS buffer is too small: %zu\n", size); + drm_info(adev_to_drm(adev), + "ATCS buffer is too small: %zu\n", size); kfree(info); return -EINVAL; } @@ -799,7 +813,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, ¶ms); if (!info) { - DRM_ERROR("ATCS PSC update failed\n"); + drm_err(adev_to_drm(adev), "ATCS PSC call failed\n"); return -EIO; } @@ -905,6 +919,44 @@ static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm) #endif /** + * amdgpu_acpi_set_uma_allocation_size - Set Unified Memory Architecture allocation size via ACPI + * @adev: Pointer to the amdgpu_device structure + * @index: Index specifying the UMA allocation + * @type: Type of UMA allocation + * + * This function configures the UMA allocation size for the specified device + * using ACPI methods. The allocation is determined by the provided index and type. + * Returns 0 on success or a negative error code on failure. + */ +int amdgpu_acpi_set_uma_allocation_size(struct amdgpu_device *adev, u8 index, u8 type) +{ + struct atcs_set_uma_allocation_size_input atcs_input; + struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs; + struct acpi_buffer params; + union acpi_object *info; + + if (!amdgpu_acpi_is_set_uma_allocation_size_supported()) + return -EINVAL; + + atcs_input.size = sizeof(struct atcs_set_uma_allocation_size_input); + atcs_input.uma_size_index = index; + atcs_input.uma_size_type = type; + + params.length = sizeof(struct atcs_set_uma_allocation_size_input); + params.pointer = &atcs_input; + + info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_SET_UMA_ALLOCATION_SIZE, ¶ms); + if (!info) { + drm_err(adev_to_drm(adev), "ATCS UMA allocation size update failed\n"); + return -EIO; + } + + kfree(info); + + return 0; +} + +/** * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu * acpi device handle * @@ -1089,7 +1141,8 @@ out: return ret; } -static int amdgpu_acpi_enumerate_xcc(void) +static noinline_for_stack +int amdgpu_acpi_enumerate_xcc(void) { struct amdgpu_acpi_dev_info *dev_info = NULL; struct amdgpu_acpi_xcc_info *xcc_info; @@ -1108,17 +1161,15 @@ static int amdgpu_acpi_enumerate_xcc(void) * one is not found, no need to check the rest. */ if (!acpi_dev) { - DRM_DEBUG_DRIVER("No matching acpi device found for %s", + DRM_DEBUG_DRIVER("No matching acpi device found for %s\n", hid); break; } xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info), GFP_KERNEL); - if (!xcc_info) { - DRM_ERROR("Failed to allocate memory for xcc info\n"); + if (!xcc_info) return -ENOMEM; - } INIT_LIST_HEAD(&xcc_info->list); xcc_info->handle = acpi_device_handle(acpi_dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index a2879d2b7c8e..67a01c4f3885 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -683,7 +683,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); if (ret) { - DRM_ERROR("amdgpu: failed to schedule IB.\n"); + drm_err(adev_to_drm(adev), "failed to schedule IB.\n"); goto err_ib_sched; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8bdfcde2029b..da4575676335 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -37,7 +37,7 @@ #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" - +#include "kfd_topology.h" extern uint64_t amdgpu_amdkfd_total_mem_size; enum TLB_FLUSH_TYPE { @@ -98,6 +98,7 @@ struct amdgpu_amdkfd_fence { spinlock_t lock; char timeline_name[TASK_COMM_LEN]; struct svm_range_bo *svm_bo; + uint16_t context_id; }; struct amdgpu_kfd_dev { @@ -148,6 +149,8 @@ struct amdkfd_process_info { /* MMU-notifier related fields */ struct mutex notifier_lock; uint32_t evicted_bos; + /* kfd context id */ + u16 context_id; struct delayed_work restore_userptr_work; struct pid *pid; bool block_mmu_notifications; @@ -188,7 +191,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, - struct svm_range_bo *svm_bo); + struct svm_range_bo *svm_bo, + u16 context_id); int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) @@ -407,7 +411,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, - struct dma_fence *fence); + u16 context_id, struct dma_fence *fence); #if IS_ENABLED(CONFIG_HSA_AMD) int kgd2kfd_init(void); void kgd2kfd_exit(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 1ef758ac5076..193ed8becab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -62,7 +62,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0); struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, - struct svm_range_bo *svm_bo) + struct svm_range_bo *svm_bo, + u16 context_id) { struct amdgpu_amdkfd_fence *fence; @@ -76,6 +77,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, get_task_comm(fence->timeline_name, current); spin_lock_init(&fence->lock); fence->svm_bo = svm_bo; + fence->context_id = context_id; dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, context, atomic_inc_return(&fence_seq)); @@ -126,8 +128,12 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; + /* if fence->svm_bo is NULL, means this fence is created through + * init_kfd_vm() or amdgpu_amdkfd_gpuvm_restore_process_bos(). + * Therefore, this fence is amdgpu_amdkfd_fence->eviction_fence. + */ if (!fence->svm_bo) { - if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) + if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, fence->context_id, f)) return true; } else { if (!svm_range_schedule_evict_svm_bo(fence)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c new file mode 100644 index 000000000000..965c7e688535 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c @@ -0,0 +1,387 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "soc_v1_0.h" +#include <uapi/linux/kfd_ioctl.h> + +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid, uint32_t inst) +{ + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, mec, pipe, queue, vmid, inst); +} + +static void unlock_srbm(struct amdgpu_device *adev, uint32_t inst) +{ + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, inst); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t queue_id, uint32_t inst) +{ + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(adev, mec, pipe, queue_id, 0, inst); +} + +static void release_queue(struct amdgpu_device *adev, uint32_t inst) +{ + unlock_srbm(adev, inst); +} + +static int init_interrupts_v12_1(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst) +{ + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(adev, mec, pipe, 0, 0, inst); + + WREG32_SOC15(GC, GET_INST(GC, inst), regCPC_INT_CNTL, + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(adev, inst); + + return 0; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + uint32_t dev_inst = GET_INST(SDMA0, engine_id); + + switch (dev_inst % adev->sdma.num_inst_per_xcc) { + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, + dev_inst / adev->sdma.num_inst_per_xcc, + regSDMA0_SDMA_QUEUE0_RB_CNTL) - regSDMA0_SDMA_QUEUE0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, + dev_inst / adev->sdma.num_inst_per_xcc, + regSDMA1_SDMA_QUEUE0_RB_CNTL) - regSDMA0_SDMA_QUEUE0_RB_CNTL; + break; + default: + BUG(); + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + + queue_id * (regSDMA0_SDMA_QUEUE1_RB_CNTL - regSDMA0_SDMA_QUEUE0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, sdma_rlc_reg_offset); + + return sdma_rlc_reg_offset; +} + +static int hqd_dump_v12_1(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) +{ + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(adev, pipe_id, queue_id, inst); + + for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(adev, inst); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int hqd_sdma_dump_v12_1(struct amdgpu_device *adev, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; + + const uint32_t first_reg = regSDMA0_SDMA_QUEUE0_RB_CNTL; + const uint32_t last_reg = regSDMA0_SDMA_QUEUE0_CONTEXT_STATUS; +#undef HQD_N_REGS +#define HQD_N_REGS (last_reg - first_reg + 1) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = first_reg; + reg <= last_reg; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int wave_control_execute_v12_1(struct amdgpu_device *adev, + uint32_t gfx_index_val, + uint32_t sq_cmd, uint32_t inst) +{ + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SA_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v12_1_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v12_1_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +static int kgd_gfx_v12_1_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) + return -EPERM; + + return 0; +} + +static uint32_t trap_mask_map_sw_to_hw(uint32_t mask) +{ + uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0; + uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0; + uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION); + uint32_t ret; + + ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end); + + return ret; +} + +static uint32_t trap_mask_map_hw_to_sw(uint32_t mask) +{ + uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START; + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + return ret; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v12_1_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) + +{ + uint32_t data = 0; + + *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev); + + data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request); + data = trap_mask_map_sw_to_hw(data); + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); + + return data; +} + +/* returns STALL_VMID or LAUNCH_MODE. */ +static uint32_t kgd_gfx_v12_1_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + bool is_stall_mode = wave_launch_mode == 4; + + if (is_stall_mode) + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID, + 1); + else + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, + wave_launch_mode); + + return data; +} + +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) +static uint32_t kgd_gfx_v12_1_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid, + uint32_t inst) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 7); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high, inst); + + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low, inst); + + return watch_address_cntl; +} + +static uint32_t kgd_gfx_v12_1_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + return 0; +} + +static uint32_t kgd_gfx_v12_1_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) +{ + return 0; +} + +const struct kfd2kgd_calls gfx_v12_1_kfd2kgd = { + .init_interrupts = init_interrupts_v12_1, + .hqd_dump = hqd_dump_v12_1, + .hqd_sdma_dump = hqd_sdma_dump_v12_1, + .wave_control_execute = wave_control_execute_v12_1, + .get_atc_vmid_pasid_mapping_info = NULL, + .enable_debug_trap = kgd_gfx_v12_1_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v12_1_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v12_1_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v12_1_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v12_1_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v12_1_set_address_watch, + .clear_address_watch = kgd_gfx_v12_1_clear_address_watch, + .hqd_sdma_get_doorbell = kgd_gfx_v12_1_hqd_sdma_get_doorbell +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b1c24c8fa686..88fc430b9425 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1397,8 +1397,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, struct dma_fence **ef) { struct amdkfd_process_info *info = NULL; + struct kfd_process *process = NULL; int ret; + process = container_of(process_info, struct kfd_process, kgd_process_info); if (!*process_info) { info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -1414,7 +1416,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), current->mm, - NULL); + NULL, process->context_id); if (!info->eviction_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; @@ -1425,6 +1427,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, INIT_DELAYED_WORK(&info->restore_userptr_work, amdgpu_amdkfd_restore_userptr_worker); + info->context_id = process->context_id; + *process_info = info; } @@ -1987,7 +1991,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( drm_gem_object_put(&mem->bo->tbo.base); /* - * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(), + * For kgd_mem allocated in import_obj_create() via + * amdgpu_amdkfd_gpuvm_import_dmabuf_fd(), * explicitly free it here. */ if (!use_release_notifier) @@ -3066,7 +3071,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, process_info->eviction_fence->mm, - NULL); + NULL, process_info->context_id); if (!new_fence) { pr_err("Failed to create eviction fence\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 636385c80f64..7f4751e5caaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -296,6 +296,83 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, return vram_type; } +static int amdgpu_atomfirmware_get_uma_carveout_info_v2_3(struct amdgpu_device *adev, + union igp_info *igp_info, + struct amdgpu_uma_carveout_info *uma_info) +{ + struct uma_carveout_option *opts; + uint8_t nr_uma_options; + int i; + + nr_uma_options = igp_info->v23.UMACarveoutIndexMax; + + if (!nr_uma_options) + return -ENODEV; + + if (nr_uma_options > MAX_UMA_OPTION_ENTRIES) { + drm_dbg(adev_to_drm(adev), + "Number of UMA options exceeds max table size. Options will not be parsed"); + return -EINVAL; + } + + uma_info->num_entries = nr_uma_options; + uma_info->uma_option_index = igp_info->v23.UMACarveoutIndex; + + opts = igp_info->v23.UMASizeControlOption; + + for (i = 0; i < nr_uma_options; i++) { + if (!opts[i].memoryCarvedGb) + uma_info->entries[i].memory_carved_mb = 512; + else + uma_info->entries[i].memory_carved_mb = (uint32_t)opts[i].memoryCarvedGb << 10; + + uma_info->entries[i].flags = opts[i].uma_carveout_option_flags.all8; + strscpy(uma_info->entries[i].name, opts[i].optionName, MAX_UMA_OPTION_NAME); + } + + return 0; +} + +int amdgpu_atomfirmware_get_uma_carveout_info(struct amdgpu_device *adev, + struct amdgpu_uma_carveout_info *uma_info) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + union igp_info *igp_info; + u16 data_offset, size; + u8 frev, crev; + int index; + + if (!(adev->flags & AMD_IS_APU)) + return -ENODEV; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + + if (!amdgpu_atom_parse_data_header(mode_info->atom_context, + index, &size, + &frev, &crev, &data_offset)) { + return -EINVAL; + } + + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + + switch (frev) { + case 2: + switch (crev) { + case 3: + return amdgpu_atomfirmware_get_uma_carveout_info_v2_3(adev, igp_info, uma_info); + break; + default: + break; + } + break; + default: + break; + } + return -ENODEV; +} + int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, int *vram_width, int *vram_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 649b5530d8ae..67c8d105729b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -32,6 +32,8 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, int *vram_width, int *vram_type, int *vram_vendor); +int amdgpu_atomfirmware_get_uma_carveout_info(struct amdgpu_device *adev, + struct amdgpu_uma_carveout_info *uma_info); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 199693369c7c..1cbba9803d31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -37,9 +37,9 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, stime = ktime_get(); for (i = 0; i < n; i++) { - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false, 0); + r = amdgpu_copy_buffer(adev, &adev->mman.default_entity, + saddr, daddr, size, NULL, &fence, + false, 0); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); @@ -66,7 +66,7 @@ static void amdgpu_benchmark_log_results(struct amdgpu_device *adev, throughput = div64_s64(throughput, time_ms); - dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from" + dev_info(adev->dev, " %s %u bo moves of %u kB from" " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n", kind, n, size >> 10, sdomain, ddomain, time_ms, throughput * 8, throughput); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 004a6a9d6b9f..09c8942c22d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -349,7 +349,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, strscpy(fw_name, "amdgpu/vega20_smc.bin"); break; default: - DRM_ERROR("SMC firmware not supported\n"); + drm_err(adev_to_drm(adev), "SMC firmware not supported\n"); return -EINVAL; } @@ -357,7 +357,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (err) { - DRM_ERROR("Failed to load firmware \"%s\"", fw_name); + drm_err(adev_to_drm(adev), + "Failed to load firmware \"%s\"\n", fw_name); amdgpu_ucode_release(&adev->pm.fw); return err; } @@ -402,7 +403,7 @@ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) kmalloc(sizeof(*cgs_device), GFP_KERNEL); if (!cgs_device) { - DRM_ERROR("Couldn't allocate CGS device structure\n"); + drm_err(adev_to_drm(adev), "Couldn't allocate CGS device structure\n"); return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 9f96d568acf2..d3e312bda4ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -877,8 +877,9 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) amdgpu_connector_get_edid(connector); if (!amdgpu_connector->edid) { - DRM_ERROR("%s: probed a monitor but no|invalid EDID\n", - connector->name); + drm_err(connector->dev, + "%s: probed a monitor but no|invalid EDID\n", + connector->name); ret = connector_status_connected; } else { amdgpu_connector->use_digital = @@ -1056,7 +1057,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) amdgpu_connector_get_edid(connector); if (!amdgpu_connector->edid) { - DRM_ERROR("%s: probed a monitor but no|invalid EDID\n", + drm_err(adev_to_drm(adev), "%s: probed a monitor but no|invalid EDID\n", connector->name); ret = connector_status_connected; broken_edid = true; /* defer use_digital to later */ @@ -1667,7 +1668,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (router->ddc_valid || router->cd_valid) { amdgpu_connector->router_bus = amdgpu_i2c_lookup(adev, &router->i2c_info); if (!amdgpu_connector->router_bus) - DRM_ERROR("Failed to assign router i2c bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "Failed to assign router i2c bus! Check dmesg for i2c errors.\n"); } if (is_dp_bridge) { @@ -1681,7 +1683,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, has_aux = true; ddc = &amdgpu_connector->ddc_bus->adapter; } else { - DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } } switch (connector_type) { @@ -1775,7 +1778,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) - DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); else ddc = &amdgpu_connector->ddc_bus->adapter; } @@ -1800,7 +1804,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) - DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); else ddc = &amdgpu_connector->ddc_bus->adapter; } @@ -1830,7 +1835,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) - DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); else ddc = &amdgpu_connector->ddc_bus->adapter; } @@ -1886,7 +1892,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) - DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); else ddc = &amdgpu_connector->ddc_bus->adapter; } @@ -1937,7 +1944,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, has_aux = true; ddc = &amdgpu_connector->ddc_bus->adapter; } else { - DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } } drm_connector_init_with_ddc(dev, &amdgpu_connector->base, @@ -1985,7 +1993,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, has_aux = true; ddc = &amdgpu_connector->ddc_bus->adapter; } else { - DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "eDP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } } drm_connector_init_with_ddc(dev, &amdgpu_connector->base, @@ -2008,7 +2017,8 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) - DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + drm_err(adev_to_drm(adev), + "LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); else ddc = &amdgpu_connector->ddc_bus->adapter; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 425a3e564360..c72c345334d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -23,6 +23,7 @@ */ #include <linux/list.h> #include "amdgpu.h" +#include "amdgpu_ras_mgr.h" static const guid_t MCE = CPER_NOTIFY_MCE; static const guid_t CMC = CPER_NOTIFY_CMC; @@ -559,7 +560,10 @@ int amdgpu_cper_init(struct amdgpu_device *adev) { int r; - if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_cper_en(adev)) + return 0; + else if (!amdgpu_sriov_vf(adev) && !amdgpu_uniras_enabled(adev) && + !amdgpu_aca_is_enabled(adev)) return 0; r = amdgpu_cper_ring_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ecdfe6cb36cc..d591dce0f3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1021,6 +1021,7 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct amdgpu_ring *ring = amdgpu_job_ring(job); + struct amdgpu_device *adev = ring->adev; unsigned int i; int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 02138aa55793..11d70fe4f75a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -76,7 +76,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); if (unlikely(r)) { - DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + drm_err(adev_to_drm(adev), + "failed to reserve CSA,PD BOs: err=%d\n", r); goto error; } } @@ -92,7 +93,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_PTE_EXECUTABLE); if (r) { - DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); + drm_err(adev_to_drm(adev), + "failed to do bo_map on static CSA, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); goto error; } @@ -116,14 +118,16 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); if (unlikely(r)) { - DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + drm_err(adev_to_drm(adev), + "failed to reserve CSA,PD BOs: err=%d\n", r); goto error; } } r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr); if (r) { - DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r); + drm_err(adev_to_drm(adev), + "failed to do bo_unmap on static CSA, err=%d\n", r); goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index afedea02188d..5c344665b43c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -438,18 +438,21 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, struct drm_sched_entity *ctx_entity; if (hw_ip >= AMDGPU_HW_IP_NUM) { - DRM_ERROR("unknown HW IP type: %d\n", hw_ip); + drm_err(adev_to_drm(ctx->mgr->adev), + "unknown HW IP type: %d\n", hw_ip); return -EINVAL; } /* Right now all IPs have only one instance - multiple rings. */ if (instance != 0) { - DRM_DEBUG("invalid ip instance: %d\n", instance); + drm_dbg(adev_to_drm(ctx->mgr->adev), + "invalid ip instance: %d\n", instance); return -EINVAL; } if (ring >= amdgpu_ctx_num_entities[hw_ip]) { - DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); + drm_dbg(adev_to_drm(ctx->mgr->adev), + "invalid ring: %d %d\n", hw_ip, ring); return -EINVAL; } @@ -874,7 +877,8 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, r = dma_fence_wait(other, true); if (r < 0 && r != -ERESTARTSYS) - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + drm_err(adev_to_drm(ctx->mgr->adev), + "AMDGPU: Error waiting for fence in ctx %p\n", ctx); dma_fence_put(other); return r; @@ -929,7 +933,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) idr_for_each_entry(idp, ctx, id) { if (kref_read(&ctx->refcount) != 1) { - DRM_ERROR("ctx %p is still alive\n", ctx); + drm_err(adev_to_drm(mgr->adev), "ctx %p is still alive\n", ctx); continue; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 62d43b8cbe58..aeb90708f229 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1921,7 +1921,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) /* preempt the IB */ r = amdgpu_ring_preempt_ib(ring); if (r) { - DRM_WARN("failed to preempt ring %d\n", ring->idx); + drm_warn(adev_to_drm(adev), "failed to preempt ring %d\n", ring->idx); goto failure; } @@ -1929,7 +1929,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) if (atomic_read(&ring->fence_drv.last_seq) != ring->fence_drv.sync_seq) { - DRM_INFO("ring %d was preempted\n", ring->idx); + drm_info(adev_to_drm(adev), "ring %d was preempted\n", ring->idx); amdgpu_ib_preempt_mark_partial_job(ring); @@ -2016,14 +2016,16 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); if (IS_ERR(ent)) { - DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); + drm_err(adev_to_drm(adev), + "unable to create amdgpu_preempt_ib debugsfs file\n"); return PTR_ERR(ent); } ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev, &fops_sclk_set); if (IS_ERR(ent)) { - DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n"); + drm_err(adev_to_drm(adev), + "unable to create amdgpu_set_sclk debugsfs file\n"); return PTR_ERR(ent); } @@ -2036,7 +2038,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) r = amdgpu_debugfs_regs_init(adev); if (r) - DRM_ERROR("registering register debugfs failed (%d).\n", r); + drm_err(adev_to_drm(adev), "registering register debugfs failed (%d).\n", r); amdgpu_debugfs_firmware_init(adev); amdgpu_ta_if_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 4e2fe6674db8..ca71c2948227 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -333,11 +333,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, struct drm_sched_job *s_job; coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + if (!coredump) return; - } coredump->skip_vram_check = skip_vram_check; coredump->reset_vram_lost = vram_lost; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 12201b8e99b3..347996f6ffaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> +#include <linux/nospec.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_client_event.h> @@ -313,42 +314,6 @@ void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state); } -int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block) -{ - int r; - - if (ip_block->version->funcs->suspend) { - r = ip_block->version->funcs->suspend(ip_block); - if (r) { - dev_err(ip_block->adev->dev, - "suspend of IP block <%s> failed %d\n", - ip_block->version->funcs->name, r); - return r; - } - } - - ip_block->status.hw = false; - return 0; -} - -int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block) -{ - int r; - - if (ip_block->version->funcs->resume) { - r = ip_block->version->funcs->resume(ip_block); - if (r) { - dev_err(ip_block->adev->dev, - "resume of IP block <%s> failed %d\n", - ip_block->version->funcs->name, r); - return r; - } - } - - ip_block->status.hw = true; - return 0; -} - /** * DOC: board_info * @@ -417,6 +382,175 @@ static const struct attribute_group amdgpu_board_attrs_group = { .is_visible = amdgpu_board_attrs_is_visible }; +/** + * DOC: uma/carveout_options + * + * This is a read-only file that lists all available UMA allocation + * options and their corresponding indices. Example output:: + * + * $ cat uma/carveout_options + * 0: Minimum (512 MB) + * 1: (1 GB) + * 2: (2 GB) + * 3: (4 GB) + * 4: (6 GB) + * 5: (8 GB) + * 6: (12 GB) + * 7: Medium (16 GB) + * 8: (24 GB) + * 9: High (32 GB) + */ +static ssize_t carveout_options_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info; + uint32_t memory_carved; + ssize_t size = 0; + + if (!uma_info || !uma_info->num_entries) + return -ENODEV; + + for (int i = 0; i < uma_info->num_entries; i++) { + memory_carved = uma_info->entries[i].memory_carved_mb; + if (memory_carved >= SZ_1G/SZ_1M) { + size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n", + i, + uma_info->entries[i].name, + memory_carved >> 10); + } else { + size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n", + i, + uma_info->entries[i].name, + memory_carved); + } + } + + return size; +} +static DEVICE_ATTR_RO(carveout_options); + +/** + * DOC: uma/carveout + * + * This file is both readable and writable. When read, it shows the + * index of the current setting. Writing a valid index to this file + * allows users to change the UMA carveout size to the selected option + * on the next boot. + * + * The available options and their corresponding indices can be read + * from the uma/carveout_options file. + */ +static ssize_t carveout_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index); +} + +static ssize_t carveout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info; + struct amdgpu_uma_carveout_option *opt; + unsigned long val; + uint8_t flags; + int r; + + r = kstrtoul(buf, 10, &val); + if (r) + return r; + + if (val >= uma_info->num_entries) + return -EINVAL; + + val = array_index_nospec(val, uma_info->num_entries); + opt = &uma_info->entries[val]; + + if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) && + !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) { + drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val); + return -EINVAL; + } + + flags = opt->flags; + flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1); + + guard(mutex)(&uma_info->update_lock); + + r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags); + if (r) + return r; + + uma_info->uma_option_index = val; + + return count; +} +static DEVICE_ATTR_RW(carveout); + +static struct attribute *amdgpu_uma_attrs[] = { + &dev_attr_carveout.attr, + &dev_attr_carveout_options.attr, + NULL +}; + +const struct attribute_group amdgpu_uma_attr_group = { + .name = "uma", + .attrs = amdgpu_uma_attrs +}; + +static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev) +{ + int rc; + + if (!(adev->flags & AMD_IS_APU)) + return; + + if (!amdgpu_acpi_is_set_uma_allocation_size_supported()) + return; + + rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info); + if (rc) { + drm_dbg(adev_to_drm(adev), + "Failed to parse UMA carveout info from VBIOS: %d\n", rc); + goto out_info; + } + + mutex_init(&adev->uma_info.update_lock); + + rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group); + if (rc) { + drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc); + goto out_attr; + } + + return; + +out_attr: + mutex_destroy(&adev->uma_info.update_lock); +out_info: + return; +} + +static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev) +{ + struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info; + + if (!amdgpu_acpi_is_set_uma_allocation_size_supported()) + return; + + mutex_destroy(&uma_info->update_lock); + uma_info->num_entries = 0; +} + static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** @@ -2264,293 +2398,6 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { }; /** - * amdgpu_device_ip_set_clockgating_state - set the CG state - * - * @dev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) - * @state: clockgating state (gate or ungate) - * - * Sets the requested clockgating state for all instances of - * the hardware IP specified. - * Returns the error code from the last instance. - */ -int amdgpu_device_ip_set_clockgating_state(void *dev, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state) -{ - struct amdgpu_device *adev = dev; - int i, r = 0; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - if (adev->ip_blocks[i].version->type != block_type) - continue; - if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) - continue; - r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - &adev->ip_blocks[i], state); - if (r) - dev_err(adev->dev, - "set_clockgating_state of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - return r; -} - -/** - * amdgpu_device_ip_set_powergating_state - set the PG state - * - * @dev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) - * @state: powergating state (gate or ungate) - * - * Sets the requested powergating state for all instances of - * the hardware IP specified. - * Returns the error code from the last instance. - */ -int amdgpu_device_ip_set_powergating_state(void *dev, - enum amd_ip_block_type block_type, - enum amd_powergating_state state) -{ - struct amdgpu_device *adev = dev; - int i, r = 0; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - if (adev->ip_blocks[i].version->type != block_type) - continue; - if (!adev->ip_blocks[i].version->funcs->set_powergating_state) - continue; - r = adev->ip_blocks[i].version->funcs->set_powergating_state( - &adev->ip_blocks[i], state); - if (r) - dev_err(adev->dev, - "set_powergating_state of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - return r; -} - -/** - * amdgpu_device_ip_get_clockgating_state - get the CG state - * - * @adev: amdgpu_device pointer - * @flags: clockgating feature flags - * - * Walks the list of IPs on the device and updates the clockgating - * flags for each IP. - * Updates @flags with the feature flags for each hardware IP where - * clockgating is enabled. - */ -void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u64 *flags) -{ - int i; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - if (adev->ip_blocks[i].version->funcs->get_clockgating_state) - adev->ip_blocks[i].version->funcs->get_clockgating_state( - &adev->ip_blocks[i], flags); - } -} - -/** - * amdgpu_device_ip_wait_for_idle - wait for idle - * - * @adev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) - * - * Waits for the request hardware IP to be idle. - * Returns 0 for success or a negative error code on failure. - */ -int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) -{ - int i, r; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - if (adev->ip_blocks[i].version->type == block_type) { - if (adev->ip_blocks[i].version->funcs->wait_for_idle) { - r = adev->ip_blocks[i].version->funcs->wait_for_idle( - &adev->ip_blocks[i]); - if (r) - return r; - } - break; - } - } - return 0; - -} - -/** - * amdgpu_device_ip_is_hw - is the hardware IP enabled - * - * @adev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) - * - * Check if the hardware IP is enable or not. - * Returns true if it the IP is enable, false if not. - */ -bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) -{ - int i; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == block_type) - return adev->ip_blocks[i].status.hw; - } - return false; -} - -/** - * amdgpu_device_ip_is_valid - is the hardware IP valid - * - * @adev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) - * - * Check if the hardware IP is valid or not. - * Returns true if it the IP is valid, false if not. - */ -bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) -{ - int i; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == block_type) - return adev->ip_blocks[i].status.valid; - } - return false; - -} - -/** - * amdgpu_device_ip_get_ip_block - get a hw IP pointer - * - * @adev: amdgpu_device pointer - * @type: Type of hardware IP (SMU, GFX, UVD, etc.) - * - * Returns a pointer to the hardware IP block structure - * if it exists for the asic, otherwise NULL. - */ -struct amdgpu_ip_block * -amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, - enum amd_ip_block_type type) -{ - int i; - - for (i = 0; i < adev->num_ip_blocks; i++) - if (adev->ip_blocks[i].version->type == type) - return &adev->ip_blocks[i]; - - return NULL; -} - -/** - * amdgpu_device_ip_block_version_cmp - * - * @adev: amdgpu_device pointer - * @type: enum amd_ip_block_type - * @major: major version - * @minor: minor version - * - * return 0 if equal or greater - * return 1 if smaller or the ip_block doesn't exist - */ -int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, - enum amd_ip_block_type type, - u32 major, u32 minor) -{ - struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); - - if (ip_block && ((ip_block->version->major > major) || - ((ip_block->version->major == major) && - (ip_block->version->minor >= minor)))) - return 0; - - return 1; -} - -static const char *ip_block_names[] = { - [AMD_IP_BLOCK_TYPE_COMMON] = "common", - [AMD_IP_BLOCK_TYPE_GMC] = "gmc", - [AMD_IP_BLOCK_TYPE_IH] = "ih", - [AMD_IP_BLOCK_TYPE_SMC] = "smu", - [AMD_IP_BLOCK_TYPE_PSP] = "psp", - [AMD_IP_BLOCK_TYPE_DCE] = "dce", - [AMD_IP_BLOCK_TYPE_GFX] = "gfx", - [AMD_IP_BLOCK_TYPE_SDMA] = "sdma", - [AMD_IP_BLOCK_TYPE_UVD] = "uvd", - [AMD_IP_BLOCK_TYPE_VCE] = "vce", - [AMD_IP_BLOCK_TYPE_ACP] = "acp", - [AMD_IP_BLOCK_TYPE_VCN] = "vcn", - [AMD_IP_BLOCK_TYPE_MES] = "mes", - [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg", - [AMD_IP_BLOCK_TYPE_VPE] = "vpe", - [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm", - [AMD_IP_BLOCK_TYPE_ISP] = "isp", - [AMD_IP_BLOCK_TYPE_RAS] = "ras", -}; - -static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type) -{ - int idx = (int)type; - - return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown"; -} - -/** - * amdgpu_device_ip_block_add - * - * @adev: amdgpu_device pointer - * @ip_block_version: pointer to the IP to add - * - * Adds the IP block driver information to the collection of IPs - * on the asic. - */ -int amdgpu_device_ip_block_add(struct amdgpu_device *adev, - const struct amdgpu_ip_block_version *ip_block_version) -{ - if (!ip_block_version) - return -EINVAL; - - switch (ip_block_version->type) { - case AMD_IP_BLOCK_TYPE_VCN: - if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) - return 0; - break; - case AMD_IP_BLOCK_TYPE_JPEG: - if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) - return 0; - break; - default: - break; - } - - dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n", - adev->num_ip_blocks, - ip_block_name(adev, ip_block_version->type), - ip_block_version->major, - ip_block_version->minor, - ip_block_version->rev, - ip_block_version->funcs->name); - - adev->ip_blocks[adev->num_ip_blocks].adev = adev; - - adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; - - return 0; -} - -/** * amdgpu_device_enable_virtual_display - enable virtual display feature * * @adev: amdgpu_device pointer @@ -3309,19 +3156,18 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - if (adev->mman.buffer_funcs_ring->sched.ready) + if (adev->mman.buffer_funcs_ring && + adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(adev, true); /* Don't init kfd if whole hive need to be reset during init */ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { - kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); } amdgpu_fru_get_product_info(adev); - if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev)) - r = amdgpu_cper_init(adev); + r = amdgpu_cper_init(adev); init_failed: @@ -4491,6 +4337,7 @@ static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev) amdgpu_fru_sysfs_init(adev); amdgpu_reg_state_sysfs_init(adev); amdgpu_xcp_sysfs_init(adev); + amdgpu_uma_sysfs_init(adev); return r; } @@ -4506,6 +4353,7 @@ static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev) amdgpu_reg_state_sysfs_fini(adev); amdgpu_xcp_sysfs_fini(adev); + amdgpu_uma_sysfs_fini(adev); } /** @@ -4931,8 +4779,15 @@ fence_driver_init: flush_delayed_work(&adev->delayed_init_work); } + /* Don't init kfd if whole hive need to be reset during init */ + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { + kgd2kfd_init_zone_device(adev); + kfd_update_svm_support_properties(adev); + } + if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) amdgpu_xgmi_reset_on_init(adev); + /* * Place those sysfs registering after `late_init`. As some of those * operations performed in `late_init` might affect the sysfs @@ -5030,7 +4885,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) */ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { - dev_info(adev->dev, "amdgpu: finishing device.\n"); + dev_info(adev->dev, "finishing device.\n"); flush_delayed_work(&adev->delayed_init_work); if (adev->mman.initialized) @@ -5064,6 +4919,14 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, false); + /* + * device went through surprise hotplug; we need to destroy topology + * before ip_fini_early to prevent kfd locking refcount issues by calling + * amdgpu_amdkfd_suspend() + */ + if (drm_dev_is_unplugged(adev_to_drm(adev))) + amdgpu_amdkfd_device_fini_sw(adev); + amdgpu_device_ip_fini_early(adev); amdgpu_irq_fini_hw(adev); @@ -5867,6 +5730,9 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) if (ret) goto mode1_reset_failed; + /* enable mmio access after mode 1 reset completed */ + adev->no_hw_access = false; + amdgpu_device_load_pci_state(adev->pdev); ret = amdgpu_psp_wait_for_bootloader(adev); if (ret) @@ -6541,7 +6407,7 @@ static int amdgpu_device_sched_resume(struct list_head *device_list, !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) dev_info( tmp_adev->dev, - "GPU reset(%d) failed with error %d \n", + "GPU reset(%d) failed with error %d\n", atomic_read( &tmp_adev->gpu_reset_counter), tmp_adev->asic_reset_res); @@ -6683,7 +6549,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence->base)) { + if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index fa2a22dfa048..8070a6da794f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -71,6 +71,7 @@ #include "nv.h" #include "soc21.h" #include "soc24.h" +#include "soc_v1_0.h" #include "navi10_ih.h" #include "ih_v6_0.h" #include "ih_v6_1.h" @@ -78,10 +79,12 @@ #include "gfx_v10_0.h" #include "gfx_v11_0.h" #include "gfx_v12_0.h" +#include "gfx_v12_1.h" #include "sdma_v5_0.h" #include "sdma_v5_2.h" #include "sdma_v6_0.h" #include "sdma_v7_0.h" +#include "sdma_v7_1.h" #include "lsdma_v6_0.h" #include "lsdma_v7_0.h" #include "vcn_v2_0.h" @@ -97,16 +100,21 @@ #include "amdgpu_vkms.h" #include "mes_v11_0.h" #include "mes_v12_0.h" +#include "mes_v12_1.h" #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" #include "smuio_v13_0.h" #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" #include "smuio_v14_0_2.h" +#include "smuio_v15_0_0.h" +#include "smuio_v15_0_8.h" #include "vcn_v5_0_0.h" #include "vcn_v5_0_1.h" #include "jpeg_v5_0_0.h" #include "jpeg_v5_0_1.h" +#include "jpeg_v5_3_0.h" + #include "amdgpu_ras_mgr.h" #include "amdgpu_vpe.h" @@ -209,6 +217,8 @@ static const char *hw_id_names[HW_ID_MAX] = { [XGBE_HWID] = "XGBE", [MP0_HWID] = "MP0", [VPE_HWID] = "VPE", + [ATU_HWID] = "ATU", + [AIGC_HWID] = "AIGC", }; static int hw_id_map[MAX_HWIP] = { @@ -240,6 +250,7 @@ static int hw_id_map[MAX_HWIP] = { [PCIE_HWIP] = PCIE_HWID, [VPE_HWIP] = VPE_HWID, [ISP_HWIP] = ISP_HWID, + [ATU_HWIP] = ATU_HWID, }; static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary) @@ -1980,12 +1991,16 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): amdgpu_device_ip_block_add(adev, &soc24_common_ip_block); break; + case IP_VERSION(12, 1, 0): + amdgpu_device_ip_block_add(adev, &soc_v1_0_common_ip_block); + break; default: dev_err(adev->dev, "Failed to add common ip block(GC_HWIP:0x%x)\n", @@ -2036,10 +2051,12 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + case IP_VERSION(12, 1, 0): amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block); break; default: @@ -2081,9 +2098,11 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block); break; case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block); break; case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 1, 0): amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block); break; default: @@ -2150,6 +2169,12 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(14, 0, 5): amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block); break; + case IP_VERSION(15, 0, 0): + amdgpu_device_ip_block_add(adev, &psp_v15_0_ip_block); + break; + case IP_VERSION(15, 0, 8): + amdgpu_device_ip_block_add(adev, &psp_v15_0_8_ip_block); + break; default: dev_err(adev->dev, "Failed to add psp ip block(MP0_HWIP:0x%x)\n", @@ -2213,6 +2238,9 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(14, 0, 5): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; + case IP_VERSION(15, 0, 0): + amdgpu_device_ip_block_add(adev, &smu_v15_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add smu ip block(MP1_HWIP:0x%x)\n", @@ -2342,12 +2370,16 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block); break; + case IP_VERSION(12, 1, 0): + amdgpu_device_ip_block_add(adev, &gfx_v12_1_ip_block); + break; default: dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n", amdgpu_ip_version(adev, GC_HWIP, 0)); @@ -2398,12 +2430,16 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 1): case IP_VERSION(6, 1, 2): case IP_VERSION(6, 1, 3): + case IP_VERSION(6, 1, 4): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; case IP_VERSION(7, 0, 0): case IP_VERSION(7, 0, 1): amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block); break; + case IP_VERSION(7, 1, 0): + amdgpu_device_ip_block_add(adev, &sdma_v7_1_ip_block); + break; default: dev_err(adev->dev, "Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n", @@ -2511,6 +2547,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); break; + case IP_VERSION(5, 3, 0): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_3_0_ip_block); + break; case IP_VERSION(5, 0, 1): amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block); @@ -2537,6 +2577,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2549,6 +2590,13 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) if (amdgpu_uni_mes) adev->enable_uni_mes = true; break; + case IP_VERSION(12, 1, 0): + amdgpu_device_ip_block_add(adev, &mes_v12_1_ip_block); + adev->enable_mes = true; + adev->enable_mes_kiq = true; + if (amdgpu_uni_mes) + adev->enable_uni_mes = true; + break; default: break; } @@ -2563,6 +2611,9 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) case IP_VERSION(9, 5, 0): aqua_vanjaram_init_soc_config(adev); break; + case IP_VERSION(12, 1, 0): + soc_v1_0_init_soc_config(adev); + break; default: break; } @@ -2929,10 +2980,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + case IP_VERSION(12, 1, 0): adev->family = AMDGPU_FAMILY_GC_12_0_0; break; default: @@ -2955,6 +3008,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): adev->flags |= AMD_IS_APU; break; default: @@ -3025,6 +3079,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; case IP_VERSION(6, 3, 1): + case IP_VERSION(7, 11, 4): adev->nbio.funcs = &nbif_v6_3_1_funcs; adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg; break; @@ -3059,6 +3114,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 1): case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): adev->hdp.funcs = &hdp_v6_0_funcs; break; case IP_VERSION(7, 0, 0): @@ -3140,6 +3196,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(14, 0, 2): adev->smuio.funcs = &smuio_v14_0_2_funcs; break; + case IP_VERSION(15, 0, 0): + adev->smuio.funcs = &smuio_v15_0_0_funcs; + break; + case IP_VERSION(15, 0, 8): + adev->smuio.funcs = &smuio_v15_0_8_funcs; + break; default: break; } @@ -3210,8 +3272,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) return r; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - !amdgpu_sriov_vf(adev)) || - (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { + !amdgpu_sriov_vf(adev) && + amdgpu_dpm == 1) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && + amdgpu_dpm == 1)) { r = amdgpu_discovery_set_smu_ip_blocks(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b5d34797d606..48b6f6077992 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -415,15 +415,15 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) int i = 0; drm_connector_list_iter_begin(dev, &iter); - DRM_INFO("AMDGPU Display Connectors\n"); + drm_info(dev, "AMDGPU Display Connectors\n"); drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - DRM_INFO("Connector %d:\n", i); - DRM_INFO(" %s\n", connector->name); + drm_info(dev, "Connector %d:\n", i); + drm_info(dev, " %s\n", connector->name); if (amdgpu_connector->hpd.hpd != AMDGPU_HPD_NONE) - DRM_INFO(" %s\n", hpd_names[amdgpu_connector->hpd.hpd]); + drm_info(dev, " %s\n", hpd_names[amdgpu_connector->hpd.hpd]); if (amdgpu_connector->ddc_bus) { - DRM_INFO(" DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + drm_info(dev, " DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", amdgpu_connector->ddc_bus->rec.mask_clk_reg, amdgpu_connector->ddc_bus->rec.mask_data_reg, amdgpu_connector->ddc_bus->rec.a_clk_reg, @@ -433,11 +433,11 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) amdgpu_connector->ddc_bus->rec.y_clk_reg, amdgpu_connector->ddc_bus->rec.y_data_reg); if (amdgpu_connector->router.ddc_valid) - DRM_INFO(" DDC Router 0x%x/0x%x\n", + drm_info(dev, " DDC Router 0x%x/0x%x\n", amdgpu_connector->router.ddc_mux_control_pin, amdgpu_connector->router.ddc_mux_state); if (amdgpu_connector->router.cd_valid) - DRM_INFO(" Clock/Data Router 0x%x/0x%x\n", + drm_info(dev, " Clock/Data Router 0x%x/0x%x\n", amdgpu_connector->router.cd_mux_control_pin, amdgpu_connector->router.cd_mux_state); } else { @@ -447,35 +447,35 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) connector->connector_type == DRM_MODE_CONNECTOR_DVIA || connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) - DRM_INFO(" DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n"); + drm_info(dev, " DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n"); } - DRM_INFO(" Encoders:\n"); + drm_info(dev, " Encoders:\n"); list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { amdgpu_encoder = to_amdgpu_encoder(encoder); devices = amdgpu_encoder->devices & amdgpu_connector->devices; if (devices) { if (devices & ATOM_DEVICE_CRT1_SUPPORT) - DRM_INFO(" CRT1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " CRT1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_CRT2_SUPPORT) - DRM_INFO(" CRT2: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " CRT2: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_LCD1_SUPPORT) - DRM_INFO(" LCD1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " LCD1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP1_SUPPORT) - DRM_INFO(" DFP1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " DFP1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP2_SUPPORT) - DRM_INFO(" DFP2: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " DFP2: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP3_SUPPORT) - DRM_INFO(" DFP3: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " DFP3: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP4_SUPPORT) - DRM_INFO(" DFP4: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " DFP4: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP5_SUPPORT) - DRM_INFO(" DFP5: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " DFP5: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP6_SUPPORT) - DRM_INFO(" DFP6: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " DFP6: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_TV1_SUPPORT) - DRM_INFO(" TV1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " TV1: %s\n", encoder_names[amdgpu_encoder->encoder_id]); if (devices & ATOM_DEVICE_CV_SUPPORT) - DRM_INFO(" CV: %s\n", encoder_names[amdgpu_encoder->encoder_id]); + drm_info(dev, " CV: %s\n", encoder_names[amdgpu_encoder->encoder_id]); } } i++; @@ -1880,7 +1880,12 @@ int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, struct drm_scanout_buffer *sb) { struct amdgpu_bo *abo; - struct drm_framebuffer *fb = plane->state->fb; + struct drm_framebuffer *fb; + + if (drm_drv_uses_atomic_modeset(plane->dev)) + fb = plane->state->fb; + else + fb = plane->fb; if (!fb) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e22cfa7c6d32..b9c38a4fe546 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -37,6 +37,7 @@ #include "amdgpu_dma_buf.h" #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +#include "amdgpu_ttm.h" #include <drm/amdgpu_drm.h> #include <drm/ttm/ttm_tt.h> #include <linux/dma-buf.h> @@ -95,18 +96,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) attach->peer2peer = false; - /* - * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. - * Such buffers cannot be safely accessed over P2P due to device-local - * compression metadata. Fallback to system-memory path instead. - * Device supports GFX12 (GC 12.x or newer) - * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag - * - */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && - bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) - attach->peer2peer = false; - if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; @@ -241,6 +230,14 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (r) return ERR_PTR(r); break; + + case AMDGPU_PL_MMIO_REMAP: + r = amdgpu_ttm_mmio_remap_alloc_sgt(adev, bo->tbo.resource, + attach->dev, dir, &sgt); + if (r) + return ERR_PTR(r); + break; + default: return ERR_PTR(-EINVAL); } @@ -266,6 +263,15 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { + struct drm_gem_object *obj = attach->dmabuf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (bo->tbo.resource && + bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) { + amdgpu_ttm_mmio_remap_free_sgt(attach->dev, dir, sgt); + return; + } + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 2675689ef70f..ebd14c292978 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -348,6 +348,36 @@ enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF }; +enum AMDGPU_SOC_V1_0_DOORBELL_ASSIGNMENT { + /* KIQ/HIQ/DIQ */ + AMDGPU_SOC_V1_0_DOORBELL_KIQ_START = 0x000, + AMDGPU_SOC_V1_0_DOORBELL_HIQ = 0x001, + AMDGPU_SOC_V1_0_DOORBELL_DIQ = 0x002, + /* Compute: 0x03 ~ 0x20 */ + AMDGPU_SOC_V1_0_DOORBELL_MEC_RING_START = 0x003, + AMDGPU_SOC_V1_0_DOORBELL_MEC_RING_END = 0x00A, + AMDGPU_SOC_V1_0_DOORBELL_MES_RING0 = 0x00B, + AMDGPU_SOC_V1_0_DOORBELL_MES_RING1 = 0x00C, + AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_START = 0x00D, + AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_END = 0x01F, + AMDGPU_SOC_V1_0_DOORBELL_XCC_RANGE = 0x020, + + /* SDMA: 0x100 ~ 0x19F */ + AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START = 0x100, + AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_END = 0x19F, + /* IH: 0x1A0 ~ 0x1AF */ + AMDGPU_SOC_V1_0_DOORBELL_IH = 0x1A0, + /* VCN: 0x1B0 ~ 0x1EF */ + AMDGPU_SOC_V1_0_DOORBELL_VCN_START = 0x1B0, + AMDGPU_SOC_V1_0_DOORBELL_VCN_END = 0x1EF, + + AMDGPU_SOC_V1_0_DOORBELL_FIRST_NON_CP = AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START, + AMDGPU_SOC_V1_0_DOORBELL_LAST_NON_CP = AMDGPU_SOC_V1_0_DOORBELL_VCN_END, + + AMDGPU_SOC_V1_0_DOORBELL_MAX_ASSIGNMENT = 0x1EF, + AMDGPU_SOC_V1_0_DOORBELL_INVALID = 0xFFFF +}; + u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index 3040437d99c2..bc7858567321 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -129,7 +129,7 @@ uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, { int db_bo_offset; - db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo); + db_bo_offset = amdgpu_bo_gpu_offset(db_bo); /* doorbell index is 32 bit but doorbell's size can be 32 bit * or 64 bit, so *db_size(in byte)/4 for alignment. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 848e6b7db482..bb8d9256fae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2402,7 +2402,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, supports_atomic = true; if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { - DRM_INFO("This hardware requires experimental hardware support.\n" + dev_info(&pdev->dev, "This hardware requires experimental hardware support.\n" "See modparam exp_hw_support\n"); return -ENODEV; } @@ -2449,7 +2449,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, retry_init: ret = drm_dev_register(ddev, flags); if (ret == -EAGAIN && ++retry <= 3) { - DRM_INFO("retry init %d\n", retry); + drm_info(adev_to_drm(adev), "retry init %d\n", retry); /* Don't request EX mode too frequently which is attacking */ msleep(5000); goto retry_init; @@ -3164,7 +3164,6 @@ static int __init amdgpu_init(void) if (r) goto error_fence; - DRM_INFO("amdgpu kernel modesetting enabled.\n"); amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index c7843e336310..06c333b2213b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -89,6 +89,16 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) return seq; } +static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) +{ + af->fence_wptr_start = af->ring->wptr; +} + +static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) +{ + af->fence_wptr_end = af->ring->wptr; +} + /** * amdgpu_fence_emit - emit a fence on the requested ring * @@ -116,8 +126,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); + amdgpu_fence_save_fence_wptr_start(af); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); + amdgpu_fence_save_fence_wptr_end(af); amdgpu_fence_save_wptr(af); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; @@ -709,6 +721,7 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) struct amdgpu_ring *ring = af->ring; unsigned long flags; u32 seq, last_seq; + bool reemitted = false; last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; @@ -726,7 +739,9 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base); - if (fence == af) + if (fence->reemitted > 1) + reemitted = true; + else if (fence == af) dma_fence_set_error(&fence->base, -ETIME); else if (fence->context == af->context) dma_fence_set_error(&fence->base, -ECANCELED); @@ -734,9 +749,12 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) rcu_read_unlock(); } while (last_seq != seq); spin_unlock_irqrestore(&ring->fence_drv.lock, flags); - /* signal the guilty fence */ - amdgpu_fence_write(ring, (u32)af->base.seqno); - amdgpu_fence_process(ring); + + if (reemitted) { + /* if we've already reemitted once then just cancel everything */ + amdgpu_fence_driver_force_completion(af->ring); + af->ring->ring_backup_entries_to_copy = 0; + } } void amdgpu_fence_save_wptr(struct amdgpu_fence *af) @@ -784,10 +802,18 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, /* save everything if the ring is not guilty, otherwise * just save the content from other contexts. */ - if (!guilty_fence || (fence->context != guilty_fence->context)) + if (!fence->reemitted && + (!guilty_fence || (fence->context != guilty_fence->context))) { amdgpu_ring_backup_unprocessed_command(ring, wptr, fence->wptr); + } else if (!fence->reemitted) { + /* always save the fence */ + amdgpu_ring_backup_unprocessed_command(ring, + fence->fence_wptr_start, + fence->fence_wptr_end); + } wptr = fence->wptr; + fence->reemitted++; } rcu_read_unlock(); } while (last_seq != seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index 328a1b963548..988ec7d7b487 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -62,17 +62,17 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, struct FW_ATT_RECORD fw_att_record = {0}; if (size < sizeof(struct FW_ATT_RECORD)) { - DRM_WARN("FW attestation input buffer not enough memory"); + drm_warn(adev_to_drm(adev), "FW attestation input buffer not enough memory"); return -EINVAL; } if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) { - DRM_WARN("FW attestation out of bounds"); + drm_warn(adev_to_drm(adev), "FW attestation out of bounds"); return 0; } if (psp_get_fw_attestation_records_addr(&adev->psp, &records_addr)) { - DRM_WARN("Failed to get FW attestation record address"); + drm_warn(adev_to_drm(adev), "Failed to get FW attestation record address"); return -EINVAL; } @@ -86,11 +86,12 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, false); if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) { - DRM_WARN("Invalid FW attestation cookie"); + drm_warn(adev_to_drm(adev), "Invalid FW attestation cookie"); return -EINVAL; } - DRM_INFO("FW attestation version = 0x%X", fw_att_hdr.AttDbVersion); + drm_info(adev_to_drm(adev), "FW attestation version = 0x%X", + fw_att_hdr.AttDbVersion); } amdgpu_device_vram_access(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d2237ce9da70..16c3b78e50cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -476,7 +476,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev) /* Compute table size */ adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE; adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE; - DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", + drm_info(adev_to_drm(adev), "GART: num cpu pages %u, num gpu pages %u\n", adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 3e38c5db2987..032971d0a3cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -112,47 +112,6 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp, return 0; } -static void -amdgpu_gem_update_bo_mapping(struct drm_file *filp, - struct amdgpu_bo_va *bo_va, - uint32_t operation, - uint64_t point, - struct dma_fence *fence, - struct drm_syncobj *syncobj, - struct dma_fence_chain *chain) -{ - struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL; - struct amdgpu_fpriv *fpriv = filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - struct dma_fence *last_update; - - if (!syncobj) - return; - - /* Find the last update fence */ - switch (operation) { - case AMDGPU_VA_OP_MAP: - case AMDGPU_VA_OP_REPLACE: - if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)) - last_update = vm->last_update; - else - last_update = bo_va->last_pt_update; - break; - case AMDGPU_VA_OP_UNMAP: - case AMDGPU_VA_OP_CLEAR: - last_update = fence; - break; - default: - return; - } - - /* Add fence to timeline */ - if (!point) - drm_syncobj_replace_fence(syncobj, last_update); - else - drm_syncobj_add_point(syncobj, chain, last_update, point); -} - static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -378,7 +337,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; r = amdgpu_vm_clear_freed(adev, vm, &fence); - if (unlikely(r < 0)) + if (unlikely(r < 0) && !drm_dev_is_unplugged(adev_to_drm(adev))) dev_err(adev->dev, "failed to clear page " "tables on GEM object close (%ld)\n", r); if (r || !fence) @@ -388,7 +347,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, dma_fence_put(fence); out_unlock: - if (r) + if (r && !drm_dev_is_unplugged(adev_to_drm(adev))) dev_err(adev->dev, "leaking bo va (%ld)\n", r); drm_exec_fini(&exec); } @@ -719,6 +678,15 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, if (unlikely(r != 0)) goto out; + /* Reject MMIO_REMAP BOs at IOCTL level: metadata/tiling does not apply. */ + if (robj->tbo.resource && + robj->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) { + DRM_WARN("metadata ioctl on MMIO_REMAP BO (handle %d)\n", + args->handle); + r = -EINVAL; + goto unreserve; + } + if (args->op == AMDGPU_GEM_METADATA_OP_GET_METADATA) { amdgpu_bo_get_tiling_flags(robj, &args->data.tiling_info); r = amdgpu_bo_get_metadata(robj, args->data.data, @@ -764,16 +732,19 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint32_t operation) { - struct dma_fence *fence = dma_fence_get_stub(); + struct dma_fence *clear_fence = dma_fence_get_stub(); + struct dma_fence *last_update = NULL; int r; if (!amdgpu_vm_ready(vm)) - return fence; + return clear_fence; - r = amdgpu_vm_clear_freed(adev, vm, &fence); + /* First clear freed BOs and get a fence for that work, if any. */ + r = amdgpu_vm_clear_freed(adev, vm, &clear_fence); if (r) goto error; + /* For MAP/REPLACE we also need to update the BO mappings. */ if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { r = amdgpu_vm_bo_update(adev, bo_va, false); @@ -781,13 +752,59 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } + /* Always update PDEs after we touched the mappings. */ r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) + goto error; + + /* + * Decide which fence represents the "last update" for this VM/BO: + * + * - For MAP/REPLACE we want the PT update fence, which is tracked as + * either vm->last_update (for always-valid BOs) or bo_va->last_pt_update + * (for per-BO updates). + * + * - For UNMAP/CLEAR we rely on the fence returned by + * amdgpu_vm_clear_freed(), which already covers the page table work + * for the removed mappings. + */ + switch (operation) { + case AMDGPU_VA_OP_MAP: + case AMDGPU_VA_OP_REPLACE: + if (bo_va && bo_va->base.bo) { + if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo)) { + if (vm->last_update) + last_update = dma_fence_get(vm->last_update); + } else { + if (bo_va->last_pt_update) + last_update = dma_fence_get(bo_va->last_pt_update); + } + } + break; + case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: + if (clear_fence) + last_update = dma_fence_get(clear_fence); + break; + default: + break; + } error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); - return fence; + /* + * If we managed to pick a more specific last-update fence, prefer it + * over the generic clear_fence and drop the extra reference to the + * latter. + */ + if (last_update) { + dma_fence_put(clear_fence); + return last_update; + } + + return clear_fence; } int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, @@ -813,6 +830,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t vm_size; int r = 0; + /* Validate virtual address range against reserved regions. */ if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) { dev_dbg(dev->dev, "va_address 0x%llx is in reserved area 0x%llx\n", @@ -846,6 +864,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + /* Validate operation type. */ switch (args->operation) { case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_UNMAP: @@ -869,6 +888,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, abo = NULL; } + /* Add input syncobj fences (if any) for synchronization. */ r = amdgpu_gem_add_input_fence(filp, args->input_fence_syncobj_handles, args->num_syncobj_handles); @@ -891,6 +911,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, goto error; } + /* Resolve the BO-VA mapping for this VM/BO combination. */ if (abo) { bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); if (!bo_va) { @@ -903,6 +924,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + /* + * Prepare the timeline syncobj node if the user requested a VM + * timeline update. This only allocates/looks up the syncobj and + * chain node; the actual fence is attached later. + */ r = amdgpu_gem_update_timeline_node(filp, args->vm_timeline_syncobj_out, args->vm_timeline_point, @@ -934,18 +960,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } + + /* + * Once the VA operation is done, update the VM and obtain the fence + * that represents the last relevant update for this mapping. This + * fence can then be exported to the user-visible VM timeline. + */ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); - if (timeline_syncobj) - amdgpu_gem_update_bo_mapping(filp, bo_va, - args->operation, - args->vm_timeline_point, - fence, timeline_syncobj, - timeline_chain); - else - dma_fence_put(fence); + if (timeline_syncobj && fence) { + if (!args->vm_timeline_point) { + /* Replace the existing fence when no point is given. */ + drm_syncobj_replace_fence(timeline_syncobj, + fence); + } else { + /* Attach the last-update fence at a specific point. */ + drm_syncobj_add_point(timeline_syncobj, + timeline_chain, + fence, + args->vm_timeline_point); + } + } + dma_fence_put(fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8b118c53f351..cf7a07855dae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -100,6 +100,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, /** * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter * + * @adev: amdgpu device pointer * @mask: array in which the per-shader array disable masks will be stored * @max_se: number of SEs * @max_sh: number of SHs @@ -107,7 +108,8 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, * The bitmask of CUs to be disabled in the shader array determined by se and * sh is stored in mask[se * max_sh + sh]. */ -void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh) +void amdgpu_gfx_parse_disable_cu(struct amdgpu_device *adev, unsigned int *mask, + unsigned int max_se, unsigned int max_sh) { unsigned int se, sh, cu; const char *p; @@ -123,16 +125,16 @@ void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsign int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); if (ret < 3) { - DRM_ERROR("amdgpu: could not parse disable_cu\n"); + drm_err(adev_to_drm(adev), "could not parse disable_cu\n"); return; } if (se < max_se && sh < max_sh && cu < 16) { - DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu); + drm_info(adev_to_drm(adev), "Disabling CU %u.%u.%u\n", se, sh, cu); mask[se * max_sh + sh] |= 1u << cu; } else { - DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n", - se, sh, cu); + drm_err(adev_to_drm(adev), "disable_cu %u.%u.%u is out of range\n", + se, sh, cu); } next = strchr(p, ','); @@ -150,7 +152,7 @@ static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) { if (amdgpu_compute_multipipe != -1) { - dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n", + dev_info(adev->dev, " forcing compute pipe policy %d\n", amdgpu_compute_multipipe); return amdgpu_compute_multipipe == 1; } @@ -511,7 +513,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) j = i + xcc_id * adev->gfx.num_compute_rings; amdgpu_mes_unmap_legacy_queue(adev, &adev->gfx.compute_ring[j], - RESET_QUEUES, 0, 0); + RESET_QUEUES, 0, 0, xcc_id); } return 0; } @@ -562,7 +564,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) j = i + xcc_id * adev->gfx.num_gfx_rings; amdgpu_mes_unmap_legacy_queue(adev, &adev->gfx.gfx_ring[j], - PREEMPT_QUEUES, 0, 0); + PREEMPT_QUEUES, 0, 0, xcc_id); } } return 0; @@ -644,7 +646,8 @@ static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id) for (i = 0; i < adev->gfx.num_compute_rings; i++) { j = i + xcc_id * adev->gfx.num_compute_rings; r = amdgpu_mes_map_legacy_queue(adev, - &adev->gfx.compute_ring[j]); + &adev->gfx.compute_ring[j], + xcc_id); if (r) { dev_err(adev->dev, "failed to map compute queue\n"); return r; @@ -733,7 +736,8 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) for (i = 0; i < adev->gfx.num_gfx_rings; i++) { j = i + xcc_id * adev->gfx.num_gfx_rings; r = amdgpu_mes_map_legacy_queue(adev, - &adev->gfx.gfx_ring[j]); + &adev->gfx.gfx_ring[j], + xcc_id); if (r) { dev_err(adev->dev, "failed to map gfx queue\n"); return r; @@ -1067,7 +1071,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ return 0; if (adev->mes.ring[0].sched.ready) - return amdgpu_mes_rreg(adev, reg); + return amdgpu_mes_rreg(adev, reg, xcc_id); BUG_ON(!ring->funcs->emit_rreg); @@ -1143,7 +1147,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 return; if (adev->mes.ring[0].sched.ready) { - amdgpu_mes_wreg(adev, reg, v); + amdgpu_mes_wreg(adev, reg, v, xcc_id); return; } @@ -1195,6 +1199,40 @@ failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); } +void amdgpu_gfx_get_hdp_flush_mask(struct amdgpu_ring *ring, + uint32_t *hdp_flush_mask, uint32_t *reg_mem_engine) +{ + + if (!ring || !hdp_flush_mask || !reg_mem_engine) { + DRM_INFO("%s:invalid params\n", __func__); + return; + } + + const struct nbio_hdp_flush_reg *nbio_hf_reg = ring->adev->nbio.hdp_flush_reg; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; + *reg_mem_engine = 1; /* pfp */ + break; + case AMDGPU_RING_TYPE_COMPUTE: + *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + *reg_mem_engine = 0; + break; + case AMDGPU_RING_TYPE_MES: + *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp8; + *reg_mem_engine = 0; + break; + case AMDGPU_RING_TYPE_KIQ: + *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp9; + *reg_mem_engine = 0; + break; + default: + DRM_ERROR("%s:unsupported ring type %d\n", __func__, ring->funcs->type); + return; + } +} + int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev) { signed long r, cnt = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index efd61a1ccc66..585cc8e81bb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -328,6 +328,8 @@ struct amdgpu_gfx_shadow_info { u32 shadow_alignment; u32 csa_size; u32 csa_alignment; + u32 eop_size; + u32 eop_alignment; }; struct amdgpu_gfx_funcs { @@ -356,6 +358,8 @@ struct amdgpu_gfx_funcs { int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); int (*get_xccs_per_xcp)(struct amdgpu_device *adev); + void (*get_hdp_flush_mask)(struct amdgpu_ring *ring, + uint32_t *ref_and_mask, uint32_t *reg_mem_engine); }; struct sq_work { @@ -565,8 +569,8 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) return (u32)((1ULL << bit_width) - 1); } -void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, - unsigned max_sh); +void amdgpu_gfx_parse_disable_cu(struct amdgpu_device *adev, unsigned int *mask, + unsigned int max_se, unsigned int max_sh); int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id); @@ -615,6 +619,8 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); +void amdgpu_gfx_get_hdp_flush_mask(struct amdgpu_ring *ring, + uint32_t *ref_and_mask, uint32_t *reg_mem_engine); int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 869bceb0fe2c..6d7b8bb953ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -524,6 +524,54 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, } while (fault->timestamp < tmp); } +int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + u64 addr, + u32 cam_index, + u32 node_id, + bool write_fault) +{ + int ret; + + if (adev->irq.retry_cam_enabled) { + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, entry->timestamp, write_fault); + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + if (ret) + return 1; + } else { + /* Process it only if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, entry->timestamp, write_fault)) + return 1; + } + return 0; +} + int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev) { int r; @@ -690,7 +738,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * itself at least for GART. */ mutex_lock(&adev->mman.gtt_window_lock); - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base, AMDGPU_FENCE_OWNER_UNDEFINED, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB); @@ -811,9 +859,9 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring[0].sched.ready) { + if (adev->mes.ring[MES_PIPE_INST(xcc_inst, 0)].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, - ref, mask); + ref, mask, xcc_inst); return; } @@ -901,6 +949,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 727342689d4b..e8e8bfa098c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -32,9 +32,11 @@ #include "amdgpu_xgmi.h" #include "amdgpu_ras.h" -/* VA hole for 48bit addresses on Vega10 */ -#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL -#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL +/* VA hole for 48bit and 57bit addresses */ +#define AMDGPU_GMC_HOLE_START (adev->vm_manager.root_level == AMDGPU_VM_PDB3 ?\ + 0x0100000000000000ULL : 0x0000800000000000ULL) +#define AMDGPU_GMC_HOLE_END (adev->vm_manager.root_level == AMDGPU_VM_PDB3 ?\ + 0xff00000000000000ULL : 0xffff800000000000ULL) /* * Hardware is programmed as if the hole doesn't exists with start and end @@ -43,7 +45,8 @@ * This mask is used to remove the upper 16bits of the VA and so come up with * the linear addr value. */ -#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL +#define AMDGPU_GMC_HOLE_MASK (adev->vm_manager.root_level == AMDGPU_VM_PDB3 ?\ + 0x00ffffffffffffffULL : 0x0000ffffffffffffULL) /* * Ring size as power of two for the log of recent faults. @@ -353,6 +356,7 @@ struct amdgpu_gmc { u64 MC_VM_MX_L1_TLB_CNTL; u64 noretry_flags; + u64 init_pte_flags; bool flush_tlb_needs_extra_type_0; bool flush_tlb_needs_extra_type_2; @@ -394,13 +398,8 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) * * @addr: address to extend */ -static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) -{ - if (addr >= AMDGPU_GMC_HOLE_START) - addr |= AMDGPU_GMC_HOLE_END; - - return addr; -} +#define amdgpu_gmc_sign_extend(addr) ((addr) >= AMDGPU_GMC_HOLE_START ?\ + ((addr) | AMDGPU_GMC_HOLE_END) : (addr)) bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev); int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev); @@ -426,6 +425,12 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint16_t pasid, uint64_t timestamp); void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid); +int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + u64 addr, + u32 cam_index, + u32 node_id, + bool write_fault); int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 895c1e4c6747..dd9b845d5783 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -26,6 +26,8 @@ #include "amdgpu.h" +#define GART_ENTRY_WITHOUT_BO_COLOR 1 + static inline struct amdgpu_gtt_mgr * to_gtt_mgr(struct ttm_resource_manager *man) { @@ -181,6 +183,49 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, } /** + * amdgpu_gtt_mgr_alloc_entries - alloc GART entries without GTT bo + * + * @mgr: The GTT manager object + * @mm_node: The drm mm node to return the new allocation node information + * @num_pages: The number of pages for the new allocation + * @mode: The new allocation mode + * + * Helper to dynamic alloc GART entries to map memory not accociated with + * GTT BO, for example VRAM BO physical memory, remote physical memory. + */ +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, + struct drm_mm_node *mm_node, + u64 num_pages, + enum drm_mm_insert_mode mode) +{ + struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); + int r; + + spin_lock(&mgr->lock); + r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, + 0, GART_ENTRY_WITHOUT_BO_COLOR, 0, + adev->gmc.gart_size >> PAGE_SHIFT, + mode); + spin_unlock(&mgr->lock); + return r; +} + +/** + * amdgpu_gtt_mgr_free_entries - free GART entries not accocaited with GTT bo + * + * @mgr: The GTT manager object + * @mm_node: The drm mm node to free + */ +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr, + struct drm_mm_node *mm_node) +{ + spin_lock(&mgr->lock); + if (drm_mm_node_allocated(mm_node)) + drm_mm_remove_node(mm_node); + spin_unlock(&mgr->lock); +} + +/** * amdgpu_gtt_mgr_recover - re-init gart * * @mgr: amdgpu_gtt_mgr pointer @@ -196,6 +241,9 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); spin_lock(&mgr->lock); drm_mm_for_each_node(mm_node, &mgr->mm) { + if (mm_node->color == GART_ENTRY_WITHOUT_BO_COLOR) + continue; + node = container_of(mm_node, typeof(*node), mm_nodes[0]); amdgpu_ttm_recover_gart(node->base.bo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h index 484e936812e4..9c56be725ff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h @@ -36,6 +36,10 @@ struct amdgpu_imu_funcs { int (*start_imu)(struct amdgpu_device *adev); void (*program_rlc_ram)(struct amdgpu_device *adev); int (*wait_for_reset_status)(struct amdgpu_device *adev); + int (*switch_compute_partition)(struct amdgpu_device *adev, + int num_xccs_per_xcp, + int compute_partition_mode); + void (*init_mcm_addr_lut)(struct amdgpu_device *adev); }; struct imu_rlc_ram_golden { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c index 99e1cf4fc955..6aa54156bbc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c @@ -94,3 +94,318 @@ void amdgpu_ip_map_init(struct amdgpu_device *adev) adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst; adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask; } + +int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block) +{ + int r; + + if (ip_block->version->funcs->suspend) { + r = ip_block->version->funcs->suspend(ip_block); + if (r) { + dev_err(ip_block->adev->dev, + "suspend of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); + return r; + } + } + + ip_block->status.hw = false; + return 0; +} + +int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block) +{ + int r; + + if (ip_block->version->funcs->resume) { + r = ip_block->version->funcs->resume(ip_block); + if (r) { + dev_err(ip_block->adev->dev, + "resume of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); + return r; + } + } + + ip_block->status.hw = true; + return 0; +} + +/** + * amdgpu_device_ip_get_ip_block - get a hw IP pointer + * + * @adev: amdgpu_device pointer + * @type: Type of hardware IP (SMU, GFX, UVD, etc.) + * + * Returns a pointer to the hardware IP block structure + * if it exists for the asic, otherwise NULL. + */ +struct amdgpu_ip_block * +amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, + enum amd_ip_block_type type) +{ + int i; + + for (i = 0; i < adev->num_ip_blocks; i++) + if (adev->ip_blocks[i].version->type == type) + return &adev->ip_blocks[i]; + + return NULL; +} + +/** + * amdgpu_device_ip_block_version_cmp + * + * @adev: amdgpu_device pointer + * @type: enum amd_ip_block_type + * @major: major version + * @minor: minor version + * + * return 0 if equal or greater + * return 1 if smaller or the ip_block doesn't exist + */ +int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, + enum amd_ip_block_type type, u32 major, + u32 minor) +{ + struct amdgpu_ip_block *ip_block = + amdgpu_device_ip_get_ip_block(adev, type); + + if (ip_block && ((ip_block->version->major > major) || + ((ip_block->version->major == major) && + (ip_block->version->minor >= minor)))) + return 0; + + return 1; +} + +static const char *const ip_block_names[] = { + [AMD_IP_BLOCK_TYPE_COMMON] = "common", + [AMD_IP_BLOCK_TYPE_GMC] = "gmc", + [AMD_IP_BLOCK_TYPE_IH] = "ih", + [AMD_IP_BLOCK_TYPE_SMC] = "smu", + [AMD_IP_BLOCK_TYPE_PSP] = "psp", + [AMD_IP_BLOCK_TYPE_DCE] = "dce", + [AMD_IP_BLOCK_TYPE_GFX] = "gfx", + [AMD_IP_BLOCK_TYPE_SDMA] = "sdma", + [AMD_IP_BLOCK_TYPE_UVD] = "uvd", + [AMD_IP_BLOCK_TYPE_VCE] = "vce", + [AMD_IP_BLOCK_TYPE_ACP] = "acp", + [AMD_IP_BLOCK_TYPE_VCN] = "vcn", + [AMD_IP_BLOCK_TYPE_MES] = "mes", + [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg", + [AMD_IP_BLOCK_TYPE_VPE] = "vpe", + [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm", + [AMD_IP_BLOCK_TYPE_ISP] = "isp", + [AMD_IP_BLOCK_TYPE_RAS] = "ras", +}; + +static const char *ip_block_name(struct amdgpu_device *adev, + enum amd_ip_block_type type) +{ + int idx = (int)type; + + return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : + "unknown"; +} + +/** + * amdgpu_device_ip_block_add + * + * @adev: amdgpu_device pointer + * @ip_block_version: pointer to the IP to add + * + * Adds the IP block driver information to the collection of IPs + * on the asic. + */ +int amdgpu_device_ip_block_add( + struct amdgpu_device *adev, + const struct amdgpu_ip_block_version *ip_block_version) +{ + if (!ip_block_version) + return -EINVAL; + + switch (ip_block_version->type) { + case AMD_IP_BLOCK_TYPE_VCN: + if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) + return 0; + break; + case AMD_IP_BLOCK_TYPE_JPEG: + if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) + return 0; + break; + default: + break; + } + + dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n", + adev->num_ip_blocks, + ip_block_name(adev, ip_block_version->type), + ip_block_version->major, ip_block_version->minor, + ip_block_version->rev, ip_block_version->funcs->name); + + adev->ip_blocks[adev->num_ip_blocks].adev = adev; + + adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; + + return 0; +} + +/** + * amdgpu_device_ip_set_clockgating_state - set the CG state + * + * @adev: amdgpu_device pointer + * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * @state: clockgating state (gate or ungate) + * + * Sets the requested clockgating state for all instances of + * the hardware IP specified. + * Returns the error code from the last instance. + */ +int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (adev->ip_blocks[i].version->type != block_type) + continue; + if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) + continue; + r = adev->ip_blocks[i].version->funcs->set_clockgating_state( + &adev->ip_blocks[i], state); + if (r) + dev_err(adev->dev, + "set_clockgating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } + return r; +} + +/** + * amdgpu_device_ip_set_powergating_state - set the PG state + * + * @adev: amdgpu_device pointer + * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * @state: powergating state (gate or ungate) + * + * Sets the requested powergating state for all instances of + * the hardware IP specified. + * Returns the error code from the last instance. + */ +int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_powergating_state state) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (adev->ip_blocks[i].version->type != block_type) + continue; + if (!adev->ip_blocks[i].version->funcs->set_powergating_state) + continue; + r = adev->ip_blocks[i].version->funcs->set_powergating_state( + &adev->ip_blocks[i], state); + if (r) + dev_err(adev->dev, + "set_powergating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } + return r; +} + +/** + * amdgpu_device_ip_get_clockgating_state - get the CG state + * + * @adev: amdgpu_device pointer + * @flags: clockgating feature flags + * + * Walks the list of IPs on the device and updates the clockgating + * flags for each IP. + * Updates @flags with the feature flags for each hardware IP where + * clockgating is enabled. + */ +void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + int i; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (adev->ip_blocks[i].version->funcs->get_clockgating_state) + adev->ip_blocks[i].version->funcs->get_clockgating_state( + &adev->ip_blocks[i], flags); + } +} + +/** + * amdgpu_device_ip_wait_for_idle - wait for idle + * + * @adev: amdgpu_device pointer + * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * + * Waits for the request hardware IP to be idle. + * Returns 0 for success or a negative error code on failure. + */ +int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) +{ + struct amdgpu_ip_block *ip_block; + + ip_block = amdgpu_device_ip_get_ip_block(adev, block_type); + if (!ip_block || !ip_block->status.valid) + return 0; + + if (ip_block->version->funcs->wait_for_idle) + return ip_block->version->funcs->wait_for_idle(ip_block); + + return 0; +} + +/** + * amdgpu_device_ip_is_hw - is the hardware IP enabled + * + * @adev: amdgpu_device pointer + * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * + * Check if the hardware IP is enable or not. + * Returns true if it the IP is enable, false if not. + */ +bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) +{ + struct amdgpu_ip_block *ip_block; + + ip_block = amdgpu_device_ip_get_ip_block(adev, block_type); + if (ip_block) + return ip_block->status.hw; + + return false; +} + +/** + * amdgpu_device_ip_is_valid - is the hardware IP valid + * + * @adev: amdgpu_device pointer + * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * + * Check if the hardware IP is valid or not. + * Returns true if it the IP is valid, false if not. + */ +bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) +{ + struct amdgpu_ip_block *ip_block; + + ip_block = amdgpu_device_ip_get_ip_block(adev, block_type); + if (ip_block) + return ip_block->status.valid; + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h index 2490fd322aec..1d0df6d93957 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h @@ -24,6 +24,131 @@ #ifndef __AMDGPU_IP_H__ #define __AMDGPU_IP_H__ +#include "amd_shared.h" + +struct amdgpu_device; + +/* Define the HW IP blocks will be used in driver , add more if necessary */ +enum amd_hw_ip_block_type { + GC_HWIP = 1, + HDP_HWIP, + SDMA0_HWIP, + SDMA1_HWIP, + SDMA2_HWIP, + SDMA3_HWIP, + SDMA4_HWIP, + SDMA5_HWIP, + SDMA6_HWIP, + SDMA7_HWIP, + LSDMA_HWIP, + MMHUB_HWIP, + ATHUB_HWIP, + NBIO_HWIP, + MP0_HWIP, + MP1_HWIP, + UVD_HWIP, + VCN_HWIP = UVD_HWIP, + JPEG_HWIP = VCN_HWIP, + VCN1_HWIP, + VCE_HWIP, + VPE_HWIP, + DF_HWIP, + DCE_HWIP, + OSSSYS_HWIP, + SMUIO_HWIP, + PWR_HWIP, + NBIF_HWIP, + THM_HWIP, + CLK_HWIP, + UMC_HWIP, + RSMU_HWIP, + XGMI_HWIP, + DCI_HWIP, + PCIE_HWIP, + ISP_HWIP, + ATU_HWIP, + AIGC_HWIP, + MAX_HWIP +}; + +#define HWIP_MAX_INSTANCE 48 + +#define HW_ID_MAX 300 +#define IP_VERSION_FULL(mj, mn, rv, var, srev) \ + (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev)) +#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0) +#define IP_VERSION_MAJ(ver) ((ver) >> 24) +#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF) +#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF) +#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF) +#define IP_VERSION_SUBREV(ver) ((ver) & 0xF) +#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8) + +struct amdgpu_ip_map_info { + /* Map of logical to actual dev instances/mask */ + uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE]; + int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int8_t inst); + uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + uint32_t mask); +}; + +#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM + +struct amdgpu_ip_block_status { + bool valid; + bool sw; + bool hw; + bool late_initialized; + bool hang; +}; + +struct amdgpu_ip_block_version { + const enum amd_ip_block_type type; + const u32 major; + const u32 minor; + const u32 rev; + const struct amd_ip_funcs *funcs; +}; + +struct amdgpu_ip_block { + struct amdgpu_ip_block_status status; + const struct amdgpu_ip_block_version *version; + struct amdgpu_device *adev; +}; + void amdgpu_ip_map_init(struct amdgpu_device *adev); +int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block); +int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block); + +struct amdgpu_ip_block * +amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, + enum amd_ip_block_type type); + +int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, + enum amd_ip_block_type type, u32 major, + u32 minor); + +int amdgpu_device_ip_block_add( + struct amdgpu_device *adev, + const struct amdgpu_ip_block_version *ip_block_version); + +int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state); +int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_powergating_state state); +void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags); +int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); +bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); +bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); + #endif /* __AMDGPU_IP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 8112ffc85995..82bc6d657e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -99,6 +99,41 @@ const char *soc15_ih_clientid_name[] = { "MP1" }; +const char *soc_v1_0_ih_clientid_name[] = { + "IH", + "Reserved", + "ATHUB", + "BIF", + "Reserved", + "Reserved", + "Reserved", + "RLC", + "Reserved", + "Reserved", + "GFX", + "IMU", + "Reserved", + "Reserved", + "VCN1 or UVD1", + "THM", + "VCN or UVD", + "Reserved", + "VMC", + "Reserved", + "GRBM_CP", + "GC_AID", + "ROM_SMUIO", + "DF", + "Reserved", + "PWR", + "LSDMA", + "GC_UTCL2", + "nHT", + "Reserved", + "MP0", + "MP1", +}; + const int node_id_to_phys_map[NODEID_MAX] = { [AID0_NODEID] = 0, [XCD0_NODEID] = 0, @@ -316,7 +351,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; - dev_dbg(adev->dev, "amdgpu: irq initialized.\n"); + dev_dbg(adev->dev, "irq initialized.\n"); return 0; free_vectors: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 9f0417456abd..af72405a7226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -26,6 +26,7 @@ #include <linux/irqdomain.h> #include "soc15_ih_clientid.h" +#include "soc_v1_0_ih_clientid.h" #include "amdgpu_ih.h" #define AMDGPU_MAX_IRQ_SRC_ID 0x100 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 37270c4dab8d..532f83d783d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -318,12 +318,36 @@ void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr) } EXPORT_SYMBOL(isp_kernel_buffer_free); +static int isp_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_isp *isp = &adev->isp; + + if (isp->funcs->hw_resume) + return isp->funcs->hw_resume(isp); + + return -ENODEV; +} + +static int isp_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_isp *isp = &adev->isp; + + if (isp->funcs->hw_suspend) + return isp->funcs->hw_suspend(isp); + + return -ENODEV; +} + static const struct amd_ip_funcs isp_ip_funcs = { .name = "isp_ip", .early_init = isp_early_init, .hw_init = isp_hw_init, .hw_fini = isp_hw_fini, .is_idle = isp_is_idle, + .suspend = isp_suspend, + .resume = isp_resume, .set_clockgating_state = isp_set_clockgating_state, .set_powergating_state = isp_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index d6f4ffa4c97c..9a5d2b1dff9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -38,6 +38,8 @@ struct amdgpu_isp; struct isp_funcs { int (*hw_init)(struct amdgpu_isp *isp); int (*hw_fini)(struct amdgpu_isp *isp); + int (*hw_suspend)(struct amdgpu_isp *isp); + int (*hw_resume)(struct amdgpu_isp *isp); }; struct amdgpu_isp { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0a0dcbf0798d..c533c0806912 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -147,7 +147,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name); } - dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + if (dma_fence_get_status(&s_job->s_fence->finished) == 0) + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6ee77f431d56..1878e0faa722 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -92,7 +92,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) return; if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD)) - DRM_WARN("smart shift update failed\n"); + drm_warn(dev, "smart shift update failed\n"); amdgpu_acpi_fini(adev); amdgpu_device_fini_hw(adev); @@ -105,7 +105,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev) mutex_lock(&mgpu_info.mutex); if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) { - DRM_ERROR("Cannot register more gpu instance\n"); + drm_err(adev_to_drm(adev), "Cannot register more gpu instance\n"); mutex_unlock(&mgpu_info.mutex); return; } @@ -162,7 +162,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) dev_dbg(dev->dev, "Error during ACPI methods call\n"); if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD)) - DRM_WARN("smart shift update failed\n"); + drm_warn(dev, "smart shift update failed\n"); out: if (r) @@ -201,6 +201,9 @@ static enum amd_ip_block_type type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; break; + case AMDGPU_HW_IP_VPE: + type = AMD_IP_BLOCK_TYPE_VPE; + break; default: type = AMD_IP_BLOCK_TYPE_NUM; break; @@ -391,6 +394,42 @@ static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev, return ret; } +static int amdgpu_userq_metadata_info_compute(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_compute *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->eop_size = shadow.eop_size; + meta->eop_alignment = shadow.eop_alignment; + ret = 0; + } + + return ret; +} + +static int amdgpu_userq_metadata_info_sdma(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_sdma *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->sdma.get_csa_info) { + struct amdgpu_sdma_csa_info csa = {}; + + adev->sdma.get_csa_info(adev, &csa); + meta->csa_size = csa.size; + meta->csa_alignment = csa.alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -721,6 +760,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMD_IP_BLOCK_TYPE_UVD: count = adev->uvd.num_uvd_inst; break; + case AMD_IP_BLOCK_TYPE_VPE: + count = adev->vpe.num_instances; + break; /* For all other IP block types not listed in the switch statement * the ip status is valid here and the instance count is one. */ @@ -1363,6 +1405,22 @@ out: ret = copy_to_user(out, &meta_info, min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; return 0; + case AMDGPU_HW_IP_COMPUTE: + ret = amdgpu_userq_metadata_info_compute(adev, info, &meta_info.compute); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + case AMDGPU_HW_IP_DMA: + ret = amdgpu_userq_metadata_info_sdma(adev, info, &meta_info.sdma); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; default: return -EINVAL; } @@ -1450,7 +1508,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev); if (r) - DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n"); + drm_warn(adev_to_drm(adev), + "Failed to init usermode queue manager (%d), use legacy workload submission only\n", + r); r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 9c182ce501af..dffa0f7276b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -31,6 +31,7 @@ #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 #define AMDGPU_ONE_DOORBELL_SIZE 8 +#define AMDGPU_MES_RESERVED_QUEUES 2 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) { @@ -91,6 +92,9 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) int amdgpu_mes_init(struct amdgpu_device *adev) { int i, r, num_pipes; + u32 total_vmid_mask, reserved_vmid_mask; + u32 queue_mask, reserved_queue_mask; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; adev->mes.adev = adev; @@ -101,12 +105,18 @@ int amdgpu_mes_init(struct amdgpu_device *adev) spin_lock_init(&adev->mes.queue_id_lock); mutex_init(&adev->mes.mutex_hidden); - for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) spin_lock_init(&adev->mes.ring_lock[i]); adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; + total_vmid_mask = (u32)((1UL << 16) - 1); + reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1); + adev->mes.vmid_mask_mmhub = 0xFF00; - adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00; + adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask; + + queue_mask = (u32)(1UL << adev->gfx.mec.num_queue_per_pipe) - 1; + reserved_queue_mask = (u32)(1UL << AMDGPU_MES_RESERVED_QUEUES) - 1; num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) @@ -142,7 +152,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { if (i >= num_pipes) break; - adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC; + adev->mes.compute_hqd_mask[i] = + adev->gfx.disable_kq ? 0xF : (queue_mask & ~reserved_queue_mask); } num_pipes = adev->sdma.num_instances; @@ -156,7 +167,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); if (r) { dev_err(adev->dev, @@ -192,16 +203,18 @@ int amdgpu_mes_init(struct amdgpu_device *adev) goto error_doorbell; if (adev->mes.hung_queue_db_array_size) { - r = amdgpu_bo_create_kernel(adev, - adev->mes.hung_queue_db_array_size * sizeof(u32), - PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &adev->mes.hung_queue_db_array_gpu_obj, - &adev->mes.hung_queue_db_array_gpu_addr, - &adev->mes.hung_queue_db_array_cpu_addr); - if (r) { - dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); - goto error_doorbell; + for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { + r = amdgpu_bo_create_kernel(adev, + adev->mes.hung_queue_db_array_size * sizeof(u32), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.hung_queue_db_array_gpu_obj[i], + &adev->mes.hung_queue_db_array_gpu_addr[i], + &adev->mes.hung_queue_db_array_cpu_addr[i]); + if (r) { + dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); + goto error_doorbell; + } } } @@ -210,12 +223,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { if (adev->mes.sch_ctx_ptr[i]) amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); if (adev->mes.query_status_fence_ptr[i]) amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs[i]); + if (adev->mes.hung_queue_db_array_gpu_obj[i]) + amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], + &adev->mes.hung_queue_db_array_gpu_addr[i], + &adev->mes.hung_queue_db_array_cpu_addr[i]); } idr_destroy(&adev->mes.pasid_idr); @@ -229,16 +246,17 @@ error: void amdgpu_mes_fini(struct amdgpu_device *adev) { int i; - - amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj, - &adev->mes.hung_queue_db_array_gpu_addr, - &adev->mes.hung_queue_db_array_cpu_addr); + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { + amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], + &adev->mes.hung_queue_db_array_gpu_addr[i], + &adev->mes.hung_queue_db_array_cpu_addr[i]); + if (adev->mes.sch_ctx_ptr[i]) amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); if (adev->mes.query_status_fence_ptr[i]) @@ -304,13 +322,14 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) } int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, - struct amdgpu_ring *ring) + struct amdgpu_ring *ring, uint32_t xcc_id) { struct mes_map_legacy_queue_input queue_input; int r; memset(&queue_input, 0, sizeof(queue_input)); + queue_input.xcc_id = xcc_id; queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; queue_input.pipe_id = ring->pipe; @@ -330,11 +349,12 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, - u64 gpu_addr, u64 seq) + u64 gpu_addr, u64 seq, uint32_t xcc_id) { struct mes_unmap_legacy_queue_input queue_input; int r; + queue_input.xcc_id = xcc_id; queue_input.action = action; queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; @@ -355,13 +375,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid, - bool use_mmio) + bool use_mmio, + uint32_t xcc_id) { struct mes_reset_queue_input queue_input; int r; memset(&queue_input, 0, sizeof(queue_input)); + queue_input.xcc_id = xcc_id; queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; queue_input.me_id = ring->me; @@ -393,11 +415,11 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, int queue_type, bool detect_only, unsigned int *hung_db_num, - u32 *hung_db_array) - + u32 *hung_db_array, + uint32_t xcc_id) { struct mes_detect_and_reset_queue_input input; - u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr; + u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id]; int r, i; if (!hung_db_num || !hung_db_array) @@ -409,7 +431,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, return -EINVAL; /* Clear the doorbell array before detection */ - memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET, + memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET, adev->mes.hung_queue_db_array_size * sizeof(u32)); input.queue_type = queue_type; input.detect_only = detect_only; @@ -436,7 +458,8 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, return r; } -uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) +uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t xcc_id) { struct mes_misc_op_input op_input; int r, val = 0; @@ -450,6 +473,7 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) } read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; + op_input.xcc_id = xcc_id; op_input.op = MES_MISC_OP_READ_REG; op_input.read_reg.reg_offset = reg; op_input.read_reg.buffer_addr = read_val_gpu_addr; @@ -473,12 +497,13 @@ error: return val; } -int amdgpu_mes_wreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t val) +int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t val, uint32_t xcc_id) { struct mes_misc_op_input op_input; int r; + op_input.xcc_id = xcc_id; op_input.op = MES_MISC_OP_WRITE_REG; op_input.write_reg.reg_offset = reg; op_input.write_reg.reg_value = val; @@ -501,11 +526,13 @@ error: int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t xcc_id) { struct mes_misc_op_input op_input; int r; + op_input.xcc_id = xcc_id; op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; op_input.wrm_reg.reg0 = reg0; op_input.wrm_reg.reg1 = reg1; @@ -530,14 +557,23 @@ error: int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) { - uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask; + uint32_t hdp_flush_req_offset, hdp_flush_done_offset; + struct amdgpu_ring *mes_ring; + uint32_t ref_and_mask = 0, reg_mem_engine = 0; + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "mes hdp flush is not supported.\n"); + return -EINVAL; + } + + mes_ring = &adev->mes.ring[0]; hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); - ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0; + + adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, ®_mem_engine); return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, - ref_and_mask, ref_and_mask); + ref_and_mask, ref_and_mask, 0); } int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, @@ -545,7 +581,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, uint32_t spi_gdbg_per_vmid_cntl, const uint32_t *tcp_watch_cntl, uint32_t flags, - bool trap_en) + bool trap_en, + uint32_t xcc_id) { struct mes_misc_op_input op_input = {0}; int r; @@ -556,6 +593,7 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, return -EINVAL; } + op_input.xcc_id = xcc_id; op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; op_input.set_shader_debugger.process_context_addr = process_context_addr; op_input.set_shader_debugger.flags.u32all = flags; @@ -584,7 +622,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, } int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, - uint64_t process_context_addr) + uint64_t process_context_addr, + uint32_t xcc_id) { struct mes_misc_op_input op_input = {0}; int r; @@ -595,6 +634,7 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, return -EINVAL; } + op_input.xcc_id = xcc_id; op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; op_input.set_shader_debugger.process_context_addr = process_context_addr; op_input.set_shader_debugger.flags.process_ctx_flush = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e989225b354b..88685c58798e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -58,11 +58,20 @@ enum amdgpu_mes_priority_level { struct amdgpu_mes_funcs; enum amdgpu_mes_pipe { - AMDGPU_MES_SCHED_PIPE = 0, - AMDGPU_MES_KIQ_PIPE, + AMDGPU_MES_PIPE_0 = 0, + AMDGPU_MES_PIPE_1, AMDGPU_MAX_MES_PIPES = 2, }; +#define AMDGPU_MES_SCHED_PIPE AMDGPU_MES_PIPE_0 +#define AMDGPU_MES_KIQ_PIPE AMDGPU_MES_PIPE_1 + +#define AMDGPU_MAX_MES_INST_PIPES \ + (AMDGPU_MAX_MES_PIPES * AMDGPU_MAX_GC_INSTANCES) + +#define MES_PIPE_INST(xcc_id, pipe_id) \ + (xcc_id * AMDGPU_MAX_MES_PIPES + pipe_id) + struct amdgpu_mes { struct amdgpu_device *adev; @@ -86,29 +95,29 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; - spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; + struct amdgpu_ring ring[AMDGPU_MAX_MES_INST_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_INST_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; /* mes ucode */ - struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES]; - uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES]; - uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES]; + struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_INST_PIPES]; + uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_INST_PIPES]; uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES]; /* mes ucode data */ - struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES]; - uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES]; - uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES]; + struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_INST_PIPES]; + uint32_t *data_fw_ptr[AMDGPU_MAX_MES_INST_PIPES]; uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES]; /* eop gpu obj */ - struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES]; - uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES]; + struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t eop_gpu_addr[AMDGPU_MAX_MES_INST_PIPES]; - void *mqd_backup[AMDGPU_MAX_MES_PIPES]; - struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES]; + void *mqd_backup[AMDGPU_MAX_MES_INST_PIPES]; + struct amdgpu_irq_src irq[AMDGPU_MAX_MES_INST_PIPES]; uint32_t vmid_mask_gfxhub; uint32_t vmid_mask_mmhub; @@ -116,18 +125,21 @@ struct amdgpu_mes { uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; - uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; - uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; - uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; - uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; - uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; + + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_INST_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_INST_PIPES]; uint32_t saved_flags; /* initialize kiq pipe */ - int (*kiq_hw_init)(struct amdgpu_device *adev); - int (*kiq_hw_fini)(struct amdgpu_device *adev); + int (*kiq_hw_init)(struct amdgpu_device *adev, + uint32_t xcc_id); + int (*kiq_hw_fini)(struct amdgpu_device *adev, + uint32_t xcc_id); /* MES doorbells */ uint32_t db_start_dw_offset; @@ -150,9 +162,15 @@ struct amdgpu_mes { int hung_queue_db_array_size; int hung_queue_hqd_info_offset; - struct amdgpu_bo *hung_queue_db_array_gpu_obj; - uint64_t hung_queue_db_array_gpu_addr; - void *hung_queue_db_array_cpu_addr; + struct amdgpu_bo *hung_queue_db_array_gpu_obj[AMDGPU_MAX_MES_PIPES]; + uint64_t hung_queue_db_array_gpu_addr[AMDGPU_MAX_MES_PIPES]; + void *hung_queue_db_array_cpu_addr[AMDGPU_MAX_MES_PIPES]; + + /* cooperative dispatch */ + bool enable_coop_mode; + int master_xcc_ids[AMDGPU_MAX_MES_INST_PIPES]; + struct amdgpu_bo *shared_cmd_buf_obj[AMDGPU_MAX_MES_INST_PIPES]; + uint64_t shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES]; }; struct amdgpu_mes_gang { @@ -208,6 +226,7 @@ struct amdgpu_mes_gang_properties { }; struct mes_add_queue_input { + uint32_t xcc_id; uint32_t process_id; uint64_t page_table_base_addr; uint64_t process_va_start; @@ -234,15 +253,19 @@ struct mes_add_queue_input { uint32_t is_aql_queue; uint32_t queue_size; uint32_t exclusively_scheduled; + uint32_t sh_mem_config_data; + uint32_t vm_cntx_cntl; }; struct mes_remove_queue_input { + uint32_t xcc_id; uint32_t doorbell_offset; uint64_t gang_context_addr; bool remove_queue_after_reset; }; struct mes_map_legacy_queue_input { + uint32_t xcc_id; uint32_t queue_type; uint32_t doorbell_offset; uint32_t pipe_id; @@ -252,6 +275,7 @@ struct mes_map_legacy_queue_input { }; struct mes_unmap_legacy_queue_input { + uint32_t xcc_id; enum amdgpu_unmap_queues_action action; uint32_t queue_type; uint32_t doorbell_offset; @@ -262,6 +286,7 @@ struct mes_unmap_legacy_queue_input { }; struct mes_suspend_gang_input { + uint32_t xcc_id; bool suspend_all_gangs; uint64_t gang_context_addr; uint64_t suspend_fence_addr; @@ -269,11 +294,13 @@ struct mes_suspend_gang_input { }; struct mes_resume_gang_input { + uint32_t xcc_id; bool resume_all_gangs; uint64_t gang_context_addr; }; struct mes_reset_queue_input { + uint32_t xcc_id; uint32_t queue_type; uint32_t doorbell_offset; bool use_mmio; @@ -309,7 +336,8 @@ enum mes_misc_opcode { }; struct mes_misc_op_input { - enum mes_misc_opcode op; + uint32_t xcc_id; + enum mes_misc_opcode op; union { struct { @@ -395,8 +423,10 @@ struct amdgpu_mes_funcs { struct mes_inv_tlbs_pasid_input *input); }; -#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) -#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) +#define amdgpu_mes_kiq_hw_init(adev, xcc_id) \ + (adev)->mes.kiq_hw_init((adev), (xcc_id)) +#define amdgpu_mes_kiq_hw_fini(adev, xcc_id) \ + (adev)->mes.kiq_hw_fini((adev), (xcc_id)) int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); int amdgpu_mes_init(struct amdgpu_device *adev); @@ -406,38 +436,42 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev); int amdgpu_mes_resume(struct amdgpu_device *adev); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, - struct amdgpu_ring *ring); + struct amdgpu_ring *ring, uint32_t xcc_id); int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, - u64 gpu_addr, u64 seq); + u64 gpu_addr, u64 seq, uint32_t xcc_id); int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid, - bool use_mmio); + bool use_mmio, + uint32_t xcc_id); int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev); int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, int queue_type, bool detect_only, unsigned int *hung_db_num, - u32 *hung_db_array); + u32 *hung_db_array, + uint32_t xcc_id); -uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); +uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t xcc_id); int amdgpu_mes_wreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t val); + uint32_t reg, uint32_t val, uint32_t xcc_id); int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, uint32_t xcc_id); int amdgpu_mes_hdp_flush(struct amdgpu_device *adev); int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr, uint32_t spi_gdbg_per_vmid_cntl, const uint32_t *tcp_watch_cntl, uint32_t flags, - bool trap_en); + bool trap_en, + uint32_t xcc_id); int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, - uint64_t process_context_addr); + uint64_t process_context_addr, uint32_t xcc_id); uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e08f58de4b17..b676310ce9ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1050,7 +1050,8 @@ static const char * const amdgpu_vram_names[] = { "DDR5", "LPDDR4", "LPDDR5", - "HBM3E" + "HBM3E", + "HBM4" }; /** @@ -1080,10 +1081,10 @@ int amdgpu_bo_init(struct amdgpu_device *adev) adev->gmc.aper_size); } - DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", + drm_info(adev_to_drm(adev), "Detected VRAM RAM=%lluM, BAR=%lluM\n", adev->gmc.mc_vram_size >> 20, (unsigned long long)adev->gmc.aper_size >> 20); - DRM_INFO("RAM width %dbits %s\n", + drm_info(adev_to_drm(adev), "RAM width %dbits %s\n", adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]); return amdgpu_ttm_init(adev); } @@ -1125,6 +1126,10 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_bo_user *ubo; + /* MMIO_REMAP is BAR I/O space; tiling should never be used here. */ + WARN_ON_ONCE(bo->tbo.resource && + bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP); + BUG_ON(bo->tbo.type == ttm_bo_type_kernel); if (adev->family <= AMDGPU_FAMILY_CZ && AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6) @@ -1147,6 +1152,13 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { struct amdgpu_bo_user *ubo; + /* + * MMIO_REMAP BOs are not real VRAM/GTT memory but a fixed BAR I/O window. + * They should never go through GEM tiling helpers. + */ + WARN_ON_ONCE(bo->tbo.resource && + bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP); + BUG_ON(bo->tbo.type == ttm_bo_type_kernel); dma_resv_assert_held(bo->tbo.base.resv); ubo = to_amdgpu_bo_user(bo); @@ -1321,8 +1333,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (r) goto out; - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true, - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv, + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); if (WARN_ON(r)) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0b10497d487c..b0540b009e84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -39,6 +39,8 @@ #include "psp_v13_0.h" #include "psp_v13_0_4.h" #include "psp_v14_0.h" +#include "psp_v15_0.h" +#include "psp_v15_0_8.h" #include "amdgpu_ras.h" #include "amdgpu_securedisplay.h" @@ -259,6 +261,13 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) psp_v14_0_set_psp_funcs(psp); psp->boot_time_tmr = false; break; + case IP_VERSION(15, 0, 0): + psp_v15_0_0_set_psp_funcs(psp); + psp->boot_time_tmr = false; + break; + case IP_VERSION(15, 0, 8): + psp_v15_0_8_set_psp_funcs(psp); + break; default: return -EINVAL; } @@ -893,18 +902,12 @@ static int psp_tmr_init(struct psp_context *psp) static bool psp_skip_tmr(struct psp_context *psp) { - switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { - case IP_VERSION(11, 0, 9): - case IP_VERSION(11, 0, 7): - case IP_VERSION(13, 0, 2): - case IP_VERSION(13, 0, 6): - case IP_VERSION(13, 0, 10): - case IP_VERSION(13, 0, 12): - case IP_VERSION(13, 0, 14): - return true; - default: - return false; - } + u32 ip_version = amdgpu_ip_version(psp->adev, MP0_HWIP, 0); + + if (amdgpu_sriov_vf(psp->adev)) + return (ip_version >= IP_VERSION(11, 0, 7)) ? true : false; + else + return (!psp->boot_time_tmr || !psp->autoload_supported) ? false : true; } static int psp_tmr_load(struct psp_context *psp) @@ -912,10 +915,7 @@ static int psp_tmr_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR. - * Already set up by host driver. - */ - if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) + if (psp_skip_tmr(psp)) return 0; cmd = acquire_psp_cmd_buf(psp); @@ -947,10 +947,7 @@ static int psp_tmr_unload(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - /* skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID SRIOV, - * as TMR is not loaded at all - */ - if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) + if (psp_skip_tmr(psp)) return 0; cmd = acquire_psp_cmd_buf(psp); @@ -1995,6 +1992,7 @@ int psp_ras_initialize(struct psp_context *psp) ras_cmd->ras_in_message.init_flags.nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask; + ras_cmd->ras_in_message.init_flags.vram_type = (uint8_t)adev->gmc.vram_type; ret = psp_ta_load(psp, &psp->ras_context.context); @@ -2620,18 +2618,16 @@ skip_pin_bo: return ret; } - if (!psp->boot_time_tmr || !psp->autoload_supported) { - ret = psp_tmr_load(psp); - if (ret) { - dev_err(adev->dev, "PSP load tmr failed!\n"); - return ret; - } + ret = psp_tmr_load(psp); + if (ret) { + dev_err(adev->dev, "PSP load tmr failed!\n"); + return ret; } return 0; } -static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, +int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) { switch (ucode->ucode_id) { @@ -2719,6 +2715,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_RLC_IRAM_1: + *type = GFX_FW_TYPE_RLX6_UCODE_CORE1; + break; + case AMDGPU_UCODE_ID_RLC_DRAM_1: + *type = GFX_FW_TYPE_RLX6_DRAM_BOOT_CORE1; + break; case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; break; @@ -2887,6 +2889,8 @@ static void psp_print_fw_hdr(struct psp_context *psp, amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_RLC_G: + case AMDGPU_UCODE_ID_RLC_DRAM_1: + case AMDGPU_UCODE_ID_RLC_IRAM_1: hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; amdgpu_ucode_print_rlc_hdr(hdr); break; @@ -2911,10 +2915,9 @@ static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp, cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; - ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + ret = psp_get_fw_type(psp, ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); if (ret) - dev_err(psp->adev->dev, "Unknown firmware type\n"); - + dev_err(psp->adev->dev, "Unknown firmware type %d\n", ucode->ucode_id); return ret; } @@ -3077,7 +3080,11 @@ static int psp_load_non_psp_fw(struct psp_context *psp) amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 11) || amdgpu_ip_version(adev, MP0_HWIP, 0) == - IP_VERSION(11, 0, 12)) && + IP_VERSION(11, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(15, 0, 0) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(15, 0, 8)) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) @@ -4531,3 +4538,19 @@ const struct amdgpu_ip_block_version psp_v14_0_ip_block = { .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v15_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 15, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; + +const struct amdgpu_ip_block_version psp_v15_0_8_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 15, + .minor = 0, + .rev = 8, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 237b624aa51c..79a49cba8d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -172,6 +172,8 @@ struct psp_funcs { bool (*is_reload_needed)(struct psp_context *psp); int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val, enum psp_reg_prog_id id); + int (*get_fw_type)(struct amdgpu_firmware_info *ucode, + enum psp_gfx_fw_type *type); }; struct ta_funcs { @@ -524,6 +526,10 @@ struct amdgpu_psp_funcs { ((psp)->funcs->reg_program_no_ring ? \ (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL) +#define psp_get_fw_type(psp, ucode, type) \ + ((psp)->funcs->get_fw_type ? \ + (psp)->funcs->get_fw_type(ucode, type):amdgpu_psp_get_fw_type(ucode, type)) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -534,6 +540,8 @@ extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v15_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v15_0_8_ip_block; int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, uint32_t flags); @@ -621,6 +629,8 @@ bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev); int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val, enum psp_reg_prog_id id); void amdgpu_psp_debugfs_init(struct amdgpu_device *adev); +int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode, + enum psp_gfx_fw_type *type); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2a6cf7963dde..c91529c778ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -237,8 +237,13 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev, (address >= RAS_UMC_INJECT_ADDR_LIMIT)) return -EFAULT; - count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, + if (amdgpu_uniras_enabled(adev)) + count = amdgpu_ras_mgr_lookup_bad_pages_in_a_row(adev, address, + page_pfns, ARRAY_SIZE(page_pfns)); + else + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, address, page_pfns, ARRAY_SIZE(page_pfns)); + if (count <= 0) return -EPERM; @@ -1917,8 +1922,6 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, for (i = 0; i < bps_count; i++) { address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT; - if (amdgpu_ras_check_critical_address(adev, address)) - continue; bps[i].size = AMDGPU_GPU_PAGE_SIZE; @@ -1931,6 +1934,10 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, else bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED; + if ((bps[i].flags != AMDGPU_RAS_RETIRE_PAGE_RESERVED) && + amdgpu_ras_check_critical_address(adev, address)) + bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED; + s += scnprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n", bps[i].bp, @@ -3076,6 +3083,11 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, struct ras_err_handler_data *data = con->eh_data; for (j = 0; j < count; j++) { + if (!data->space_left && + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { + return -ENOMEM; + } + if (amdgpu_ras_check_bad_page_unlock(con, bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) { data->count++; @@ -3083,11 +3095,6 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, continue; } - if (!data->space_left && - amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { - return -ENOMEM; - } - amdgpu_ras_reserve_page(adev, bps[j].retired_page); memcpy(&data->bps[data->count], &(bps[j]), @@ -3249,8 +3256,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, /* deal with retire_unit records a time */ ret = __amdgpu_ras_convert_rec_array_from_rom(adev, &bps[i], &err_data, nps); - if (ret) - con->bad_page_num -= adev->umc.retire_unit; i += (adev->umc.retire_unit - 1); } else { break; @@ -3263,8 +3268,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, for (; i < pages; i++) { ret = __amdgpu_ras_convert_rec_from_rom(adev, &bps[i], &err_data, nps); - if (ret) - con->bad_page_num -= adev->umc.retire_unit; } con->eh_data->count_saved = con->eh_data->count; @@ -4421,10 +4424,10 @@ static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev, return 0; if (amdgpu_ras_query_error_status(adev, &info) != 0) - DRM_WARN("RAS init harvest failure"); + drm_warn(adev_to_drm(adev), "RAS init query failure"); if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0) - DRM_WARN("RAS init harvest reset failure"); + drm_warn(adev_to_drm(adev), "RAS init harvest reset failure"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index c596b6df2e2d..600e6bb98af7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -76,8 +76,12 @@ unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type) * @ring: amdgpu_ring structure holding ring information * @ndw: number of dwords to allocate in the ring buffer * - * Allocate @ndw dwords in the ring buffer (all asics). - * Returns 0 on success, error on failure. + * Allocate @ndw dwords in the ring buffer. The number of dwords should be the + * sum of all commands written to the ring. + * + * Returns: + * 0 on success, otherwise -ENOMEM if it tries to allocate more than the + * maximum dword allowed for one submission. */ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) { @@ -123,7 +127,8 @@ static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) ring->funcs->begin_use(ring); } -/** amdgpu_ring_insert_nop - insert NOP packets +/** + * amdgpu_ring_insert_nop - insert NOP packets * * @ring: amdgpu_ring structure holding ring information * @count: the number of NOP packets to insert @@ -186,7 +191,7 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) uint32_t count; if (ring->count_dw < 0) - DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); + drm_err(adev_to_drm(ring->adev), "writing more dwords to the ring than expected!\n"); /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7a27c6c4bb44..87c9df6c2ecf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -62,6 +62,8 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) + +/* Ensure the execution in case of preemption or reset */ #define AMDGPU_FENCE_FLAG_EXEC (1 << 3) #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) @@ -144,10 +146,15 @@ struct amdgpu_fence { struct amdgpu_ring *ring; ktime_t start_timestamp; - /* wptr for the fence for resets */ + /* wptr for the total submission for resets */ u64 wptr; /* fence context for resets */ u64 context; + /* has this fence been reemitted */ + unsigned int reemitted; + /* wptr for the fence for the submission */ + u64 fence_wptr_start; + u64 fence_wptr_end; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 5aa830a02d80..572a60e1b3cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -515,6 +515,40 @@ static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev) } } +static void amdgpu_gfx_rlc_init_microcode_v2_5(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_5 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_5 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.rlc_1_iram_ucode_size_bytes = + le32_to_cpu(rlc_hdr->rlc_1_iram_ucode_size_bytes); + adev->gfx.rlc.rlc_1_iram_ucode = (u8 *)rlc_hdr + + le32_to_cpu(rlc_hdr->rlc_1_iram_ucode_offset_bytes); + adev->gfx.rlc.rlc_1_dram_ucode_size_bytes = + le32_to_cpu(rlc_hdr->rlc_1_dram_ucode_size_bytes); + adev->gfx.rlc.rlc_1_dram_ucode = (u8 *)rlc_hdr + + le32_to_cpu(rlc_hdr->rlc_1_dram_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlc_1_iram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM_1]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM_1; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_1_iram_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlc_1_dram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM_1]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM_1; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_1_dram_ucode_size_bytes, PAGE_SIZE); + } + } +} + int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, uint16_t version_major, uint16_t version_minor) @@ -545,6 +579,7 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, amdgpu_gfx_rlc_init_microcode_v2_3(adev); if (version_minor == 4) amdgpu_gfx_rlc_init_microcode_v2_4(adev); - + if (version_minor == 5) + amdgpu_gfx_rlc_init_microcode_v2_5(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 2ce310b31942..e535534237a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -257,7 +257,8 @@ struct amdgpu_rlc_funcs { void (*stop)(struct amdgpu_device *adev); void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); - void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid); + void (*update_spm_vmid)(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_ring *ring, unsigned vmid); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; @@ -269,6 +270,15 @@ struct amdgpu_rlcg_reg_access_ctrl { uint32_t grbm_cntl; uint32_t grbm_idx; uint32_t spare_int; + + uint32_t vfi_cmd; + uint32_t vfi_stat; + uint32_t vfi_addr; + uint32_t vfi_data; + uint32_t vfi_grbm_cntl; + uint32_t vfi_grbm_idx; + uint32_t vfi_grbm_cntl_data; + uint32_t vfi_grbm_idx_data; }; struct amdgpu_rlc { @@ -310,6 +320,8 @@ struct amdgpu_rlc { u32 save_restore_list_srm_size_bytes; u32 rlc_iram_ucode_size_bytes; u32 rlc_dram_ucode_size_bytes; + u32 rlc_1_iram_ucode_size_bytes; + u32 rlc_1_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; u32 global_tap_delays_ucode_size_bytes; @@ -325,6 +337,8 @@ struct amdgpu_rlc { u8 *save_restore_list_srm; u8 *rlc_iram_ucode; u8 *rlc_dram_ucode; + u8 *rlc_1_iram_ucode; + u8 *rlc_1_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; u8 *global_tap_delays_ucode; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 34311f32be4c..2bf365609775 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,11 @@ enum amdgpu_sdma_irq { #define NUM_SDMA(x) hweight32(x) +struct amdgpu_sdma_csa_info { + u32 size; + u32 alignment; +}; + struct amdgpu_sdma_funcs { int (*stop_kernel_queue)(struct amdgpu_ring *ring); int (*start_kernel_queue)(struct amdgpu_ring *ring); @@ -65,7 +70,10 @@ struct amdgpu_sdma_instance { struct amdgpu_ring ring; struct amdgpu_ring page; bool burst_nop; - uint32_t aid_id; + union { + uint32_t aid_id; + uint32_t xcc_id; + }; struct amdgpu_bo *sdma_fw_obj; uint64_t sdma_fw_gpu_addr; @@ -123,7 +131,10 @@ struct amdgpu_sdma { int num_instances; uint32_t sdma_mask; - int num_inst_per_aid; + union { + int num_inst_per_aid; + int num_inst_per_xcc; + }; uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; @@ -133,6 +144,8 @@ struct amdgpu_sdma { struct list_head reset_callback_list; bool no_user_submission; bool disable_uq; + void (*get_csa_info)(struct amdgpu_device *adev, + struct amdgpu_sdma_csa_info *csa_info); }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index ec9d12f85f39..124b13a68f3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -27,6 +27,7 @@ enum amdgpu_pkg_type { AMDGPU_PKG_TYPE_APU = 2, AMDGPU_PKG_TYPE_CEM = 3, AMDGPU_PKG_TYPE_OAM = 4, + AMDGPU_PKG_TYPE_BB = 5, AMDGPU_PKG_TYPE_UNKNOWN, }; @@ -44,6 +45,8 @@ struct amdgpu_smuio_funcs { u32 (*get_socket_id)(struct amdgpu_device *adev); enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); + bool (*is_connected_with_ethernet_switch)(struct amdgpu_device *adev); + bool (*is_custom_hbm_supported)(struct amdgpu_device *adev); u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2b931e855abd..cfbcce9c27c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -162,13 +162,25 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, *placement = abo->placement; } +static struct dma_fence * +amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw) +{ + struct amdgpu_ring *ring; + + ring = adev->mman.buffer_funcs_ring; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + return amdgpu_job_submit(job); +} + /** * amdgpu_ttm_map_buffer - Map memory into the GART windows + * @entity: entity to run the window setup job * @bo: buffer object to map * @mem: memory object to map * @mm_cur: range to map * @window: which GART window to use - * @ring: DMA ring to use for the copy * @tmz: if we should setup a TMZ enabled mapping * @size: in number of bytes to map, out number of bytes mapped * @addr: resulting address inside the MC address space @@ -176,13 +188,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, * Setup one of the GART windows to access a specific piece of memory or return * the physical address for local memory. */ -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, +static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, + struct ttm_buffer_object *bo, struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, - unsigned int window, struct amdgpu_ring *ring, + unsigned int window, bool tmz, uint64_t *size, uint64_t *addr) { - struct amdgpu_device *adev = ring->adev; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); unsigned int offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; struct amdgpu_job *job; @@ -223,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, + r = amdgpu_job_alloc_with_ib(adev, &entity->base, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job, @@ -239,9 +252,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes, 0); - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); if (tmz) flags |= AMDGPU_PTE_TMZ; @@ -259,13 +269,14 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr); } - dma_fence_put(amdgpu_job_submit(job)); + dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw)); return 0; } /** * amdgpu_ttm_copy_mem_to_mem - Helper function for copy * @adev: amdgpu device + * @entity: entity to run the jobs * @src: buffer/address where to read from * @dst: buffer/address where to write to * @size: number of bytes to copy @@ -280,13 +291,13 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, */ __attribute__((nonnull)) static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_ttm_buffer_entity *entity, const struct amdgpu_copy_mem *src, const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f) { - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor src_mm, dst_mm; struct dma_fence *fence = NULL; int r = 0; @@ -312,13 +323,13 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20); /* Map src to window 0 and dst to window 1. */ - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm, - 0, ring, tmz, &cur_size, &from); + r = amdgpu_ttm_map_buffer(entity, src->bo, src->mem, &src_mm, + 0, tmz, &cur_size, &from); if (r) goto error; - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm, - 1, ring, tmz, &cur_size, &to); + r = amdgpu_ttm_map_buffer(entity, dst->bo, dst->mem, &dst_mm, + 1, tmz, &cur_size, &to); if (r) goto error; @@ -345,8 +356,8 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, write_compress_disable)); } - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, - &next, false, true, copy_flags); + r = amdgpu_copy_buffer(adev, entity, from, to, cur_size, resv, + &next, true, copy_flags); if (r) goto error; @@ -386,7 +397,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, src.offset = 0; dst.offset = 0; - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, + r = amdgpu_ttm_copy_mem_to_mem(adev, + &adev->mman.move_entity, + &src, &dst, new_mem->size, amdgpu_bo_encrypted(abo), bo->base.resv, &fence); @@ -398,8 +411,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); + r = amdgpu_fill_buffer(&adev->mman.move_entity, + abo, 0, NULL, &wipe_fence, + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; } else if (wipe_fence) { @@ -1063,6 +1077,86 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, } /** + * amdgpu_ttm_mmio_remap_alloc_sgt - build an sg_table for MMIO_REMAP I/O aperture + * @adev: amdgpu device providing the remap BAR base (adev->rmmio_remap.bus_addr) + * @res: TTM resource of the BO to export; expected to live in AMDGPU_PL_MMIO_REMAP + * @dev: importing device to map for (typically @attach->dev in dma-buf paths) + * @dir: DMA data direction for the importer (passed to dma_map_resource()) + * @sgt: output; on success, set to a newly allocated sg_table describing the I/O span + * + * The HDP flush page (AMDGPU_PL_MMIO_REMAP) is a fixed hardware I/O window in a PCI + * BAR—there are no struct pages to back it. Importers still need a DMA address list, + * so we synthesize a minimal sg_table and populate it from dma_map_resource(), not + * from pages. Using the common amdgpu_res_cursor walker keeps the offset/size math + * consistent with other TTM/manager users. + * + * - @res is assumed to be a small, contiguous I/O region (typically a single 4 KiB + * page) in AMDGPU_PL_MMIO_REMAP. Callers should validate placement before calling. + * - The sg entry is created with sg_set_page(sg, NULL, …) to reflect I/O space. + * - The mapping uses DMA_ATTR_SKIP_CPU_SYNC because this is MMIO, not cacheable RAM. + * - Peer reachability / p2pdma policy checks must be done by the caller. + * + * Return: + * * 0 on success, with *@sgt set to a valid table that must be freed via + * amdgpu_ttm_mmio_remap_free_sgt(). + * * -ENOMEM if allocation of the sg_table fails. + * * -EIO if dma_map_resource() fails. + * + */ +int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev, + struct ttm_resource *res, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt) +{ + struct amdgpu_res_cursor cur; + dma_addr_t dma; + resource_size_t phys; + struct scatterlist *sg; + int r; + + /* Walk the resource once; MMIO_REMAP is expected to be contiguous+small. */ + amdgpu_res_first(res, 0, res->size, &cur); + + /* Translate byte offset in the remap window into a host physical BAR address. */ + phys = adev->rmmio_remap.bus_addr + cur.start; + + /* Build a single-entry sg_table mapped as I/O (no struct page backing). */ + *sgt = kzalloc(sizeof(**sgt), GFP_KERNEL); + if (!*sgt) + return -ENOMEM; + r = sg_alloc_table(*sgt, 1, GFP_KERNEL); + if (r) { + kfree(*sgt); + return r; + } + sg = (*sgt)->sgl; + sg_set_page(sg, NULL, cur.size, 0); /* WHY: I/O space → no pages */ + + dma = dma_map_resource(dev, phys, cur.size, dir, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, dma)) { + sg_free_table(*sgt); + kfree(*sgt); + return -EIO; + } + sg_dma_address(sg) = dma; + sg_dma_len(sg) = cur.size; + return 0; +} + +void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev, + enum dma_data_direction dir, + struct sg_table *sgt) +{ + struct scatterlist *sg = sgt->sgl; + + dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), + dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + kfree(sgt); +} + +/** * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO * * @bo: The buffer object to create a GTT ttm_tt object around @@ -1478,7 +1572,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, memcpy(adev->mman.sdma_access_ptr, buf, len); num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job, @@ -1497,10 +1591,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, 0); - amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - - fence = amdgpu_job_submit(job); + fence = amdgpu_ttm_job_submit(adev, job, num_dw); mutex_unlock(&adev->mman.gtt_window_lock); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) @@ -1744,7 +1835,13 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) reserve_size = max(reserve_size, (uint32_t)280 << 20); - else if (!reserve_size) + else if (!adev->bios && + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { + if (hweight32(adev->aid_mask) == 1) + reserve_size = max(reserve_size, (uint32_t)128 << 20); + else + reserve_size = max(reserve_size, (uint32_t)144 << 20); + } else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { @@ -1820,6 +1917,10 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular * GEM object (amdgpu_bo_create). * + * The BO is created as a normal GEM object via amdgpu_bo_create(), then + * reserved and pinned at the TTM level (ttm_bo_pin()) so it can never be + * migrated or evicted. No CPU mapping is established here. + * * Return: * * 0 on success or intentional skip (feature not present/unsupported) * * negative errno on allocation failure @@ -1848,7 +1949,26 @@ static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) if (r) return r; + r = amdgpu_bo_reserve(adev->rmmio_remap.bo, true); + if (r) + goto err_unref; + + /* + * MMIO_REMAP is a fixed I/O placement (AMDGPU_PL_MMIO_REMAP). + * Use TTM-level pin so the BO cannot be evicted/migrated, + * independent of GEM domains. This + * enforces the “fixed I/O window” + */ + ttm_bo_pin(&adev->rmmio_remap.bo->tbo); + + amdgpu_bo_unreserve(adev->rmmio_remap.bo); return 0; + +err_unref: + if (adev->rmmio_remap.bo) + amdgpu_bo_unref(&adev->rmmio_remap.bo); + adev->rmmio_remap.bo = NULL; + return r; } /** @@ -1860,6 +1980,15 @@ static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) */ static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev) { + struct amdgpu_bo *bo = adev->rmmio_remap.bo; + + if (!bo) + return; /* <-- safest early exit */ + + if (!amdgpu_bo_reserve(adev->rmmio_remap.bo, true)) { + ttm_bo_unpin(&adev->rmmio_remap.bo->tbo); + amdgpu_bo_unreserve(adev->rmmio_remap.bo); + } amdgpu_bo_unref(&adev->rmmio_remap.bo); adev->rmmio_remap.bo = NULL; } @@ -1988,7 +2117,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); } - dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n", + dev_info(adev->dev, " %uM of VRAM memory ready\n", (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); /* Compute GTT size, either based on TTM limit @@ -2014,7 +2143,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) dev_err(adev->dev, "Failed initializing GTT heap.\n"); return r; } - dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n", + dev_info(adev->dev, " %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); if (adev->flags & AMD_IS_APU) { @@ -2077,7 +2206,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr)) - DRM_WARN("Debug VRAM access will use slowpath MM access\n"); + drm_warn(adev_to_drm(adev), + "Debug VRAM access will use slowpath MM access\n"); return 0; } @@ -2137,7 +2267,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; - dev_info(adev->dev, "amdgpu: ttm finalized\n"); + dev_info(adev->dev, " ttm finalized\n"); } /** @@ -2165,7 +2295,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ring = adev->mman.buffer_funcs_ring; sched = &ring->sched; - r = drm_sched_entity_init(&adev->mman.high_pr, + r = drm_sched_entity_init(&adev->mman.default_entity.base, DRM_SCHED_PRIORITY_KERNEL, &sched, 1, NULL); if (r) { @@ -2175,18 +2305,30 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) return; } - r = drm_sched_entity_init(&adev->mman.low_pr, + r = drm_sched_entity_init(&adev->mman.clear_entity.base, + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (r) { + dev_err(adev->dev, + "Failed setting up TTM BO clear entity (%d)\n", + r); + goto error_free_entity; + } + + r = drm_sched_entity_init(&adev->mman.move_entity.base, DRM_SCHED_PRIORITY_NORMAL, &sched, 1, NULL); if (r) { dev_err(adev->dev, "Failed setting up TTM BO move entity (%d)\n", r); + drm_sched_entity_destroy(&adev->mman.clear_entity.base); goto error_free_entity; } } else { - drm_sched_entity_destroy(&adev->mman.high_pr); - drm_sched_entity_destroy(&adev->mman.low_pr); + drm_sched_entity_destroy(&adev->mman.default_entity.base); + drm_sched_entity_destroy(&adev->mman.clear_entity.base); + drm_sched_entity_destroy(&adev->mman.move_entity.base); /* Drop all the old fences since re-creating the scheduler entities * will allocate new contexts. */ @@ -2204,24 +2346,20 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) return; error_free_entity: - drm_sched_entity_destroy(&adev->mman.high_pr); + drm_sched_entity_destroy(&adev->mman.default_entity.base); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, - bool direct_submit, + struct amdgpu_ttm_buffer_entity *entity, unsigned int num_dw, struct dma_resv *resv, bool vm_needs_flush, struct amdgpu_job **job, - bool delayed, u64 k_job_id) + u64 k_job_id) { - enum amdgpu_ib_pool_type pool = direct_submit ? - AMDGPU_IB_POOL_DIRECT : - AMDGPU_IB_POOL_DELAYED; + enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED; int r; - struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr : - &adev->mman.high_pr; - r = amdgpu_job_alloc_with_ib(adev, entity, + r = amdgpu_job_alloc_with_ib(adev, &entity->base, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job, k_job_id); if (r) @@ -2240,20 +2378,24 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, DMA_RESV_USAGE_BOOKKEEP); } -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_device *adev, + struct amdgpu_ttm_buffer_entity *entity, + uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, - struct dma_fence **fence, bool direct_submit, + struct dma_fence **fence, bool vm_needs_flush, uint32_t copy_flags) { - struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; + struct amdgpu_ring *ring; struct amdgpu_job *job; uint32_t max_bytes; unsigned int i; int r; - if (!direct_submit && !ring->sched.ready) { + ring = adev->mman.buffer_funcs_ring; + + if (!ring->sched.ready) { dev_err(adev->dev, "Trying to move memory with ring turned off.\n"); return -EINVAL; @@ -2262,11 +2404,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job, false, + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, + resv, vm_needs_flush, &job, AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER); if (r) - return r; + goto error_free; for (i = 0; i < num_loops; i++) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); @@ -2278,16 +2420,9 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, byte_count -= cur_size_in_bytes; } - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - if (direct_submit) - r = amdgpu_job_submit_direct(job, ring, fence); - else - *fence = amdgpu_job_submit(job); - if (r) - goto error_free; + *fence = amdgpu_ttm_job_submit(adev, job, num_dw); - return r; + return 0; error_free: amdgpu_job_free(job); @@ -2295,14 +2430,15 @@ error_free: return r; } -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, +static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev, + struct amdgpu_ttm_buffer_entity *entity, + uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush, bool delayed, + bool vm_needs_flush, u64 k_job_id) { - struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; struct amdgpu_job *job; uint32_t max_bytes; @@ -2312,8 +2448,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, max_bytes = adev->mman.buffer_funcs->fill_max_bytes; num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); - r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job, delayed, k_job_id); + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv, + vm_needs_flush, &job, k_job_id); if (r) return r; @@ -2327,9 +2463,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, byte_count -= cur_size; } - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - *fence = amdgpu_job_submit(job); + *fence = amdgpu_ttm_job_submit(adev, job, num_dw); return 0; } @@ -2349,7 +2483,6 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; int r = 0; @@ -2377,13 +2510,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, /* Never clear more than 256MiB at once to avoid timeouts */ size = min(cursor.size, 256ULL << 20); - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, - 1, ring, false, &size, &addr); + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity, + &bo->tbo, bo->tbo.resource, &cursor, + 1, false, &size, &addr); if (r) goto err; - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, - &next, true, true, + r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv, + &next, true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) goto err; @@ -2399,15 +2533,14 @@ err: return r; } -int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct dma_resv *resv, - struct dma_fence **f, - bool delayed, - u64 k_job_id) +int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, + struct amdgpu_bo *bo, + uint32_t src_data, + struct dma_resv *resv, + struct dma_fence **f, + u64 k_job_id) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct dma_fence *fence = NULL; struct amdgpu_res_cursor dst; int r; @@ -2428,13 +2561,14 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* Never fill more than 256MiB at once to avoid timeouts */ cur_size = min(dst.size, 256ULL << 20); - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst, - 1, ring, false, &cur_size, &to); + r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst, + 1, false, &cur_size, &to); if (r) goto error; - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true, delayed, k_job_id); + r = amdgpu_ttm_fill_mem(adev, entity, + src_data, to, cur_size, resv, + &next, true, k_job_id); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 577ee04ce0bf..143201ecea3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -52,6 +52,10 @@ struct amdgpu_gtt_mgr { spinlock_t lock; }; +struct amdgpu_ttm_buffer_entity { + struct drm_sched_entity base; +}; + struct amdgpu_mman { struct ttm_device bdev; struct ttm_pool *ttm_pools; @@ -64,10 +68,10 @@ struct amdgpu_mman { bool buffer_funcs_enabled; struct mutex gtt_window_lock; - /* High priority scheduler entity for buffer moves */ - struct drm_sched_entity high_pr; - /* Low priority scheduler entity for VRAM clearing */ - struct drm_sched_entity low_pr; + + struct amdgpu_ttm_buffer_entity default_entity; + struct amdgpu_ttm_buffer_entity clear_entity; + struct amdgpu_ttm_buffer_entity move_entity; struct amdgpu_vram_mgr vram_mgr; struct amdgpu_gtt_mgr gtt_mgr; @@ -137,6 +141,12 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem); void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr); +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, + struct drm_mm_node *mm_node, + u64 num_pages, + enum drm_mm_insert_mode mode); +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr, + struct drm_mm_node *mm_node); uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man); u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo); @@ -163,20 +173,22 @@ int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_device *adev, + struct amdgpu_ttm_buffer_entity *entity, + uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, - struct dma_fence **fence, bool direct_submit, + struct dma_fence **fence, bool vm_needs_flush, uint32_t copy_flags); int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_resv *resv, struct dma_fence **fence); -int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct dma_resv *resv, - struct dma_fence **fence, - bool delayed, - u64 k_job_id); +int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, + struct amdgpu_bo *bo, + uint32_t src_data, + struct dma_resv *resv, + struct dma_fence **f, + u64 k_job_id); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); @@ -213,4 +225,13 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type); void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); +int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev, + struct ttm_resource *res, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt); +void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev, + enum dma_data_direction dir, + struct sg_table *sgt); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index e96f24e9ad57..1ab61e7b35db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -166,6 +166,8 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2); const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 = container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3); + const struct rlc_firmware_header_v2_5 *rlc_hdr_v2_5 = + container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_5, v2_2); switch (version_minor) { case 0: @@ -287,6 +289,26 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n", le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes)); break; + case 5: + /* rlc_hdr v2_5 */ + DRM_INFO("rlc_iram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_iram_ucode_size_bytes)); + DRM_INFO("rlc_iram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_iram_ucode_offset_bytes)); + DRM_INFO("rlc_dram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_dram_ucode_size_bytes)); + DRM_INFO("rlc_dram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_dram_ucode_offset_bytes)); + /* rlc_hdr v2_5 */ + DRM_INFO("rlc_1_iram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->rlc_1_iram_ucode_size_bytes)); + DRM_INFO("rlc_1_iram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->rlc_1_iram_ucode_offset_bytes)); + DRM_INFO("rlc_1_dram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->rlc_1_dram_ucode_size_bytes)); + DRM_INFO("rlc_1_dram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_5->rlc_1_dram_ucode_offset_bytes)); + break; default: DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor); break; @@ -631,6 +653,10 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_IRAM"; case AMDGPU_UCODE_ID_RLC_DRAM: return "RLC_DRAM"; + case AMDGPU_UCODE_ID_RLC_IRAM_1: + return "RLC_IRAM_1"; + case AMDGPU_UCODE_ID_RLC_DRAM_1: + return "RLC_DRAM_1"; case AMDGPU_UCODE_ID_RLC_G: return "RLC_G"; case AMDGPU_UCODE_ID_RLC_P: @@ -911,6 +937,14 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlc_dram_ucode; break; + case AMDGPU_UCODE_ID_RLC_IRAM_1: + ucode->ucode_size = adev->gfx.rlc.rlc_1_iram_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.rlc_1_iram_ucode; + break; + case AMDGPU_UCODE_ID_RLC_DRAM_1: + ucode->ucode_size = adev->gfx.rlc.rlc_1_dram_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.rlc_1_dram_ucode; + break; case AMDGPU_UCODE_ID_RLC_P: ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcp_ucode; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 6349aad6da35..f316776fe950 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -300,6 +300,15 @@ struct rlc_firmware_header_v2_4 { uint32_t se3_tap_delays_ucode_offset_bytes; }; +/* version_major=2, version_minor=5 */ +struct rlc_firmware_header_v2_5 { + struct rlc_firmware_header_v2_2 v2_2; + uint32_t rlc_1_iram_ucode_size_bytes; + uint32_t rlc_1_iram_ucode_offset_bytes; + uint32_t rlc_1_dram_ucode_size_bytes; + uint32_t rlc_1_dram_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -449,6 +458,7 @@ union amdgpu_firmware_header { struct rlc_firmware_header_v2_2 rlc_v2_2; struct rlc_firmware_header_v2_3 rlc_v2_3; struct rlc_firmware_header_v2_4 rlc_v2_4; + struct rlc_firmware_header_v2_5 rlc_v2_5; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct sdma_firmware_header_v2_0 sdma_v2_0; @@ -512,6 +522,8 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, AMDGPU_UCODE_ID_RLC_IRAM, AMDGPU_UCODE_ID_RLC_DRAM, + AMDGPU_UCODE_ID_RLC_IRAM_1, + AMDGPU_UCODE_ID_RLC_DRAM_1, AMDGPU_UCODE_ID_RLC_P, AMDGPU_UCODE_ID_RLC_V, AMDGPU_UCODE_ID_RLC_G, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 9a969175900e..c5dd5815056c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -166,7 +166,8 @@ static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, return 0; } -int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue, +int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue, u64 addr, u64 expected_size) { struct amdgpu_bo_va_mapping *va_map; @@ -271,10 +272,9 @@ err: return r; } -static int -amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; @@ -282,7 +282,7 @@ amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, int r = 0; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { - r = userq_funcs->preempt(uq_mgr, queue); + r = userq_funcs->preempt(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; found_hung_queue = true; @@ -297,17 +297,16 @@ amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, return r; } -static int -amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; int r = 0; if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { - r = userq_funcs->restore(uq_mgr, queue); + r = userq_funcs->restore(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; } else { @@ -318,10 +317,9 @@ amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr, return r; } -static int -amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; @@ -330,7 +328,7 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { - r = userq_funcs->unmap(uq_mgr, queue); + r = userq_funcs->unmap(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; found_hung_queue = true; @@ -345,17 +343,16 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, return r; } -static int -amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; int r = 0; if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { - r = userq_funcs->map(uq_mgr, queue); + r = userq_funcs->map(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; amdgpu_userq_detect_and_reset_queues(uq_mgr); @@ -367,10 +364,9 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, return r; } -static int -amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct dma_fence *f = queue->last_fence; int ret = 0; @@ -387,11 +383,10 @@ amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, return ret; } -static void -amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - int queue_id) +static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, + int queue_id) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; @@ -400,10 +395,10 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, /* Drop the userq reference. */ amdgpu_userq_buffer_vas_list_cleanup(adev, queue); - uq_funcs->mqd_destroy(uq_mgr, queue); + uq_funcs->mqd_destroy(queue); amdgpu_userq_fence_driver_free(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ - xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id); + xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id); xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); queue->userq_mgr = NULL; list_del(&queue->userq_va_list); @@ -415,7 +410,7 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, static struct amdgpu_usermode_queue * amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { - return xa_load(&uq_mgr->userq_mgr_xa, qid); + return xa_load(&uq_mgr->userq_xa, qid); } void @@ -584,25 +579,33 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) mutex_unlock(&uq_mgr->userq_mutex); return -EINVAL; } - amdgpu_userq_wait_for_last_fence(uq_mgr, queue); + amdgpu_userq_wait_for_last_fence(queue); r = amdgpu_bo_reserve(queue->db_obj.obj, true); if (!r) { amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unreserve(queue->db_obj.obj); } amdgpu_bo_unref(&queue->db_obj.obj); + + r = amdgpu_bo_reserve(queue->wptr_obj.obj, true); + if (!r) { + amdgpu_bo_unpin(queue->wptr_obj.obj); + amdgpu_bo_unreserve(queue->wptr_obj.obj); + } + amdgpu_bo_unref(&queue->wptr_obj.obj); + atomic_dec(&uq_mgr->userq_count[queue->queue_type]); #if defined(CONFIG_DEBUG_FS) debugfs_remove_recursive(queue->debugfs_queue); #endif amdgpu_userq_detect_and_reset_queues(uq_mgr); - r = amdgpu_userq_unmap_helper(uq_mgr, queue); + r = amdgpu_userq_unmap_helper(queue); /*TODO: It requires a reset for userq hw unmap error*/ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) { drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); queue->state = AMDGPU_USERQ_STATE_HUNG; } - amdgpu_userq_cleanup(uq_mgr, queue, queue_id); + amdgpu_userq_cleanup(queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -729,10 +732,11 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) db_info.db_obj = &queue->db_obj; db_info.doorbell_offset = args->in.doorbell_offset; + queue->userq_mgr = uq_mgr; /* Validate the userq virtual address.*/ - if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) || - amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || - amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) || + amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { r = -EINVAL; kfree(queue); goto unlock; @@ -755,7 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } - r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); + r = uq_funcs->mqd_create(queue, &args->in); if (r) { drm_file_err(uq_mgr->file, "Failed to create Queue\n"); amdgpu_userq_fence_driver_free(queue); @@ -772,18 +776,18 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } - r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); + r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, + XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); if (r) { drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); amdgpu_userq_fence_driver_free(queue); - uq_funcs->mqd_destroy(uq_mgr, queue); + uq_funcs->mqd_destroy(queue); kfree(queue); r = -ENOMEM; up_read(&adev->reset_domain->sem); goto unlock; } up_read(&adev->reset_domain->sem); - queue->userq_mgr = uq_mgr; /* don't map the queue if scheduling is halted */ if (adev->userq_halt_for_enforce_isolation && @@ -793,12 +797,12 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) else skip_map_queue = false; if (!skip_map_queue) { - r = amdgpu_userq_map_helper(uq_mgr, queue); + r = amdgpu_userq_map_helper(queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - xa_erase(&uq_mgr->userq_mgr_xa, qid); + xa_erase(&uq_mgr->userq_xa, qid); amdgpu_userq_fence_driver_free(queue); - uq_funcs->mqd_destroy(uq_mgr, queue); + uq_funcs->mqd_destroy(queue); kfree(queue); goto unlock; } @@ -923,8 +927,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) int ret = 0, r; /* Resume all the queues for this process */ - xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) { - + xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { if (!amdgpu_userq_buffer_vas_mapped(queue)) { drm_file_err(uq_mgr->file, "trying restore queue without va mapping\n"); @@ -932,7 +935,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) continue; } - r = amdgpu_userq_restore_helper(uq_mgr, queue); + r = amdgpu_userq_restore_helper(queue); if (r) ret = r; } @@ -1167,8 +1170,8 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ - xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) { - r = amdgpu_userq_preempt_helper(uq_mgr, queue); + xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; } @@ -1202,7 +1205,7 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) unsigned long queue_id; int ret; - xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) { + xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { struct dma_fence *f = queue->last_fence; if (!f || dma_fence_is_signaled(f)) @@ -1252,7 +1255,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f struct amdgpu_device *adev) { mutex_init(&userq_mgr->userq_mutex); - xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC); + xa_init_flags(&userq_mgr->userq_xa, XA_FLAGS_ALLOC); userq_mgr->adev = adev; userq_mgr->file = file_priv; @@ -1269,13 +1272,13 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) mutex_lock(&userq_mgr->userq_mutex); amdgpu_userq_detect_and_reset_queues(userq_mgr); - xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) { - amdgpu_userq_wait_for_last_fence(userq_mgr, queue); - amdgpu_userq_unmap_helper(userq_mgr, queue); - amdgpu_userq_cleanup(userq_mgr, queue, queue_id); + xa_for_each(&userq_mgr->userq_xa, queue_id, queue) { + amdgpu_userq_wait_for_last_fence(queue); + amdgpu_userq_unmap_helper(queue); + amdgpu_userq_cleanup(queue, queue_id); } - xa_destroy(&userq_mgr->userq_mgr_xa); + xa_destroy(&userq_mgr->userq_xa); mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } @@ -1297,9 +1300,9 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) guard(mutex)(&uqm->userq_mutex); amdgpu_userq_detect_and_reset_queues(uqm); if (adev->in_s0ix) - r = amdgpu_userq_preempt_helper(uqm, queue); + r = amdgpu_userq_preempt_helper(queue); else - r = amdgpu_userq_unmap_helper(uqm, queue); + r = amdgpu_userq_unmap_helper(queue); if (r) return r; } @@ -1321,9 +1324,9 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) uqm = queue->userq_mgr; guard(mutex)(&uqm->userq_mutex); if (adev->in_s0ix) - r = amdgpu_userq_restore_helper(uqm, queue); + r = amdgpu_userq_restore_helper(queue); else - r = amdgpu_userq_map_helper(uqm, queue); + r = amdgpu_userq_map_helper(queue); if (r) return r; } @@ -1355,7 +1358,7 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { amdgpu_userq_detect_and_reset_queues(uqm); - r = amdgpu_userq_preempt_helper(uqm, queue); + r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; } @@ -1387,9 +1390,9 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - r = amdgpu_userq_restore_helper(uqm, queue); - if (r) - ret = r; + r = amdgpu_userq_restore_helper(queue); + if (r) + ret = r; } mutex_unlock(&uqm->userq_mutex); } @@ -1439,9 +1442,9 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev) uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { - amdgpu_userq_wait_for_last_fence(uqm, queue); + amdgpu_userq_wait_for_last_fence(queue); userq_funcs = adev->userq_funcs[queue->queue_type]; - userq_funcs->unmap(uqm, queue); + userq_funcs->unmap(queue); /* just mark all queues as hung at this point. * if unmap succeeds, we could map again * in amdgpu_userq_post_reset() if vram is not lost @@ -1458,18 +1461,16 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost) * at this point, we should be able to map it again * and continue if vram is not lost. */ - struct amdgpu_userq_mgr *uqm; struct amdgpu_usermode_queue *queue; const struct amdgpu_userq_funcs *userq_funcs; unsigned long queue_id; int r = 0; xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { - uqm = queue->userq_mgr; if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) { userq_funcs = adev->userq_funcs[queue->queue_type]; /* Re-map queue */ - r = userq_funcs->map(uqm, queue); + r = userq_funcs->map(queue); if (r) { dev_err(adev->dev, "Failed to remap queue %ld\n", queue_id); continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index c37444427a14..1eaa94f8a291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -77,19 +77,13 @@ struct amdgpu_usermode_queue { }; struct amdgpu_userq_funcs { - int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr, - struct drm_amdgpu_userq_in *args, - struct amdgpu_usermode_queue *queue); - void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *uq); - int (*unmap)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue); - int (*map)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue); - int (*preempt)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue); - int (*restore)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue); + int (*mqd_create)(struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *args); + void (*mqd_destroy)(struct amdgpu_usermode_queue *uq); + int (*unmap)(struct amdgpu_usermode_queue *queue); + int (*map)(struct amdgpu_usermode_queue *queue); + int (*preempt)(struct amdgpu_usermode_queue *queue); + int (*restore)(struct amdgpu_usermode_queue *queue); int (*detect_and_reset)(struct amdgpu_device *adev, int queue_type); }; @@ -97,11 +91,11 @@ struct amdgpu_userq_funcs { /* Usermode queues for gfx */ struct amdgpu_userq_mgr { /** - * @userq_mgr_xa: Per-process user queue map (queue ID → queue) + * @userq_xa: Per-process user queue map (queue ID → queue) * Key: queue_id (unique ID within the process's userq manager) * Value: struct amdgpu_usermode_queue */ - struct xarray userq_mgr_xa; + struct xarray userq_xa; struct mutex userq_mutex; struct amdgpu_device *adev; struct delayed_work resume_work; @@ -153,7 +147,8 @@ void amdgpu_userq_reset_work(struct work_struct *work); void amdgpu_userq_pre_reset(struct amdgpu_device *adev); int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost); -int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue, +int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue, u64 addr, u64 expected_size); int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index eba9fb359047..25f178536469 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -352,6 +352,7 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { /** * amdgpu_userq_fence_read_wptr - Read the userq wptr value * + * @adev: amdgpu_device pointer * @queue: user mode queue structure pointer * @wptr: write pointer value * @@ -361,7 +362,8 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { * * Returns wptr value on success, error on failure. */ -static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, +static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue, u64 *wptr) { struct amdgpu_bo_va_mapping *mapping; @@ -455,6 +457,7 @@ amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq) int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; struct drm_amdgpu_userq_signal *args = data; @@ -539,13 +542,13 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } /* Retrieve the user queue */ - queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id); + queue = xa_load(&userq_mgr->userq_xa, args->queue_id); if (!queue) { r = -ENOENT; goto put_gobj_write; } - r = amdgpu_userq_fence_read_wptr(queue, &wptr); + r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); if (r) goto put_gobj_write; @@ -901,7 +904,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, */ num_fences = dma_fence_dedup_array(fences, num_fences); - waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id); + waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id); if (!waitq) { r = -EINVAL; goto free_fences; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 5c38f0d30c87..9d5cca7da1d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -279,7 +279,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", + drm_info(adev_to_drm(adev), "Found UVD firmware Version: %u.%u Family ID: %u\n", version_major, version_minor, family_id); /* @@ -306,7 +306,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f; enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3; - DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n", + drm_info(adev_to_drm(adev), "Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n", enc_major, enc_minor, dec_minor, family_id); adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; @@ -467,7 +467,8 @@ int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev) int amdgpu_uvd_suspend(struct amdgpu_device *adev) { if (amdgpu_ras_intr_triggered()) - DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + drm_warn(adev_to_drm(adev), + "UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 5e0786ea911b..75ae9b429420 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,7 @@ #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" #define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" +#define FIRMWARE_VCN5_3_0 "amdgpu/vcn_5_3_0.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -90,6 +91,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); +MODULE_FIRMWARE(FIRMWARE_VCN5_3_0); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 47a6ce4fdc74..f8eac92a2b36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -37,6 +37,7 @@ #include "vi.h" #include "soc15.h" #include "nv.h" +#include "amdgpu_virt_ras_cmd.h" #define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \ do { \ @@ -1337,6 +1338,133 @@ bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, return ret; } +static u32 amdgpu_virt_rlcg_vfi_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) +{ + uint32_t timeout = 100; + uint32_t i; + + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + void *vfi_cmd; + void *vfi_stat; + void *vfi_addr; + void *vfi_data; + void *vfi_grbm_cntl; + void *vfi_grbm_idx; + uint32_t cmd; + uint32_t stat; + uint32_t addr = offset; + uint32_t data; + uint32_t grbm_cntl_data; + uint32_t grbm_idx_data; + + unsigned long flags; + bool is_err = true; + + if (!adev->gfx.rlc.rlcg_reg_access_supported) { + dev_err(adev->dev, "VFi interface is not available\n"); + return 0; + } + + if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) { + dev_err(adev->dev, "VFi invalid XCC, xcc_id=0x%x\n", xcc_id); + return 0; + } + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id]; + vfi_cmd = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_cmd; + vfi_stat = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_stat; + vfi_addr = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_addr; + vfi_data = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_data; + vfi_grbm_cntl = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_grbm_cntl; + vfi_grbm_idx = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_grbm_idx; + grbm_cntl_data = reg_access_ctrl->vfi_grbm_cntl_data; + grbm_idx_data = reg_access_ctrl->vfi_grbm_idx_data; + + if (flag == AMDGPU_RLCG_GC_WRITE) { + data = v; + cmd = AMDGPU_RLCG_VFI_CMD__WR; + + // the GRBM_GFX_CNTL and GRBM_GFX_INDEX are protected by mutex outside this call + if (addr == reg_access_ctrl->grbm_cntl) { + reg_access_ctrl->vfi_grbm_cntl_data = data; + return 0; + } else if (addr == reg_access_ctrl->grbm_idx) { + reg_access_ctrl->vfi_grbm_idx_data = data; + return 0; + } + + } else if (flag == AMDGPU_RLCG_GC_READ) { + data = 0; + cmd = AMDGPU_RLCG_VFI_CMD__RD; + + // the GRBM_GFX_CNTL and GRBM_GFX_INDEX are protected by mutex outside this call + if (addr == reg_access_ctrl->grbm_cntl) + return grbm_cntl_data; + else if (addr == reg_access_ctrl->grbm_idx) + return grbm_idx_data; + + } else { + dev_err(adev->dev, "VFi invalid access, flag=0x%x\n", flag); + return 0; + } + + spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags); + + writel(addr, vfi_addr); + writel(data, vfi_data); + writel(grbm_cntl_data, vfi_grbm_cntl); + writel(grbm_idx_data, vfi_grbm_idx); + + writel(AMDGPU_RLCG_VFI_STAT__BUSY, vfi_stat); + writel(cmd, vfi_cmd); + + for (i = 0; i < timeout; i++) { + stat = readl(vfi_stat); + if (stat != AMDGPU_RLCG_VFI_STAT__BUSY) + break; + udelay(10); + } + + switch (stat) { + case AMDGPU_RLCG_VFI_STAT__DONE: + is_err = false; + if (cmd == AMDGPU_RLCG_VFI_CMD__RD) + data = readl(vfi_data); + break; + case AMDGPU_RLCG_VFI_STAT__BUSY: + dev_err(adev->dev, "VFi access timeout\n"); + break; + case AMDGPU_RLCG_VFI_STAT__INV_CMD: + dev_err(adev->dev, "VFi invalid command\n"); + break; + case AMDGPU_RLCG_VFI_STAT__INV_ADDR: + dev_err(adev->dev, "VFi invalid address\n"); + break; + case AMDGPU_RLCG_VFI_STAT__ERR: + dev_err(adev->dev, "VFi unknown error\n"); + break; + default: + dev_err(adev->dev, "VFi unknown status code\n"); + break; + } + + spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags); + + if (is_err) + dev_err(adev->dev, "VFi: [grbm_cntl=0x%x grbm_idx=0x%x] addr=0x%x (byte addr 0x%x), data=0x%x, cmd=0x%x\n", + grbm_cntl_data, grbm_idx_data, + addr, addr * 4, data, cmd); + else + dev_dbg(adev->dev, "VFi: [grbm_cntl=0x%x grbm_idx=0x%x] addr=0x%x (byte addr 0x%x), data=0x%x, cmd=0x%x\n", + grbm_cntl_data, grbm_idx_data, + addr, addr * 4, data, cmd); + + return data; +} + u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; @@ -1350,6 +1478,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f void *spare_int; unsigned long flags; + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0)) + return amdgpu_virt_rlcg_vfi_reg_rw(adev, offset, v, flag, xcc_id); + if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, "indirect registers access through rlcg is not available\n"); @@ -1533,6 +1664,9 @@ bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev) if (adev->virt.ras_en_caps.bits.poison_propogation_mode) con->poison_supported = true; /* Poison is handled by host */ + if (adev->virt.ras_en_caps.bits.uniras_supported) + amdgpu_virt_ras_set_remote_uniras(adev, true); + return true; } @@ -1845,3 +1979,28 @@ int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, b return r; } + +static int req_remote_ras_cmd(struct amdgpu_device *adev, + u32 param1, u32 param2, u32 param3) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->req_remote_ras_cmd) + return virt->ops->req_remote_ras_cmd(adev, param1, param2, param3); + return -ENOENT; +} + +int amdgpu_virt_send_remote_ras_cmd(struct amdgpu_device *adev, + uint64_t buf, uint32_t buf_len) +{ + uint64_t gpa = buf; + int ret = -EIO; + + if (down_read_trylock(&adev->reset_domain->sem)) { + ret = req_remote_ras_cmd(adev, + lower_32_bits(gpa), upper_32_bits(gpa), buf_len); + up_read(&adev->reset_domain->sem); + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 01d5bca2dee1..886fbce0bfd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -47,6 +47,15 @@ #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF #define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 +#define AMDGPU_RLCG_VFI_CMD__WR 0x0 +#define AMDGPU_RLCG_VFI_CMD__RD 0x1 + +#define AMDGPU_RLCG_VFI_STAT__BUSY 0x0 +#define AMDGPU_RLCG_VFI_STAT__DONE 0x1 +#define AMDGPU_RLCG_VFI_STAT__INV_CMD 0x2 +#define AMDGPU_RLCG_VFI_STAT__INV_ADDR 0x3 +#define AMDGPU_RLCG_VFI_STAT__ERR 0xFF + /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 /* tonga/fiji use this offset */ @@ -105,6 +114,8 @@ struct amdgpu_virt_ops { int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); int (*req_bad_pages)(struct amdgpu_device *adev); int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr); + int (*req_remote_ras_cmd)(struct amdgpu_device *adev, + u32 param1, u32 param2, u32 param3); }; /* @@ -483,4 +494,6 @@ bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, enum amdgpu_ras_block block); void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev); int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit); +int amdgpu_virt_send_remote_ras_cmd(struct amdgpu_device *adev, + uint64_t buf, uint32_t buf_len); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 79bad9cbe2ab..e548dc9708a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -53,7 +53,9 @@ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer) ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer, output->period_ns); if (ret_overrun != 1) - DRM_WARN("%s: vblank timer overrun\n", __func__); + drm_warn(amdgpu_crtc->base.dev, + "%s: vblank timer overrun count: %llu\n", + __func__, ret_overrun); ret = drm_crtc_handle_vblank(crtc); /* Don't queue timer again when vblank is disabled. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c362d4dfb5bb..0eccb31793ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -834,7 +834,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid); if (ring->funcs->emit_gds_switch && gds_switch_needed) { @@ -2362,9 +2362,26 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, unsigned max_bits) { unsigned int max_size = 1 << (max_bits - 30); + bool sys_5level_pgtable = false; unsigned int vm_size; uint64_t tmp; +#ifdef CONFIG_X86_64 + /* + * Refer to function configure_5level_paging() for details. + */ + sys_5level_pgtable = (native_read_cr4() & X86_CR4_LA57); +#endif + + /* + * If GPU supports 5-level page table, but system uses 4-level page table, + * then use 4-level page table on GPU + */ + if (max_level == 4 && !sys_5level_pgtable) { + min_vm_size = 256 * 1024; + max_level = 3; + } + /* adjust vm size first */ if (amdgpu_vm_size != -1) { vm_size = amdgpu_vm_size; @@ -2407,6 +2424,9 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp); switch (adev->vm_manager.num_level) { + case 4: + adev->vm_manager.root_level = AMDGPU_VM_PDB3; + break; case 3: adev->vm_manager.root_level = AMDGPU_VM_PDB2; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 15d757c016cb..139642eacdd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -129,6 +129,7 @@ struct amdgpu_bo_vm; AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype)) #define AMDGPU_PTE_DCC (1ULL << 58) +#define AMDGPU_PTE_BUS_ATOMICS (1ULL << 59) #define AMDGPU_PTE_IS_PTE (1ULL << 63) /* PDE Block Fragment Size for gfx v12 */ @@ -185,9 +186,10 @@ struct amdgpu_bo_vm; #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) /* VMPT level enumerate, and the hiberachy is: - * PDB2->PDB1->PDB0->PTB + * PDB3->PDB2->PDB1->PDB0->PTB */ enum amdgpu_vm_level { + AMDGPU_VM_PDB3, AMDGPU_VM_PDB2, AMDGPU_VM_PDB1, AMDGPU_VM_PDB0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index f794fb1cc06e..31a437ce9570 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -51,6 +51,7 @@ static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, unsigned int level) { switch (level) { + case AMDGPU_VM_PDB3: case AMDGPU_VM_PDB2: case AMDGPU_VM_PDB1: case AMDGPU_VM_PDB0: @@ -366,6 +367,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *ancestor = &vmbo->bo; unsigned int entries; struct amdgpu_bo *bo = &vmbo->bo; + uint64_t value = 0, flags = 0; uint64_t addr; int r, idx; @@ -403,7 +405,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, addr = 0; - uint64_t value = 0, flags = 0; if (adev->asic_type >= CHIP_VEGA10) { if (level != AMDGPU_VM_PTB) { /* Handle leaf PDEs as PTEs */ @@ -412,7 +413,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, &value, &flags); } else { /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; + flags = AMDGPU_PTE_EXECUTABLE | adev->gmc.init_pte_flags; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index aa78c2ee9e21..fd881388d612 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -790,7 +790,7 @@ static int vpe_ring_test_ring(struct amdgpu_ring *ring) ret = amdgpu_ring_alloc(ring, 4); if (ret) { - dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret); + dev_err(adev->dev, "dma failed to lock ring %d (%d).\n", ring->idx, ret); goto out; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 1083db8cea2e..73250ab45f20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -540,6 +540,7 @@ static void amdgpu_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_HW_IP_GFX: case AMDGPU_RING_TYPE_COMPUTE: case AMDGPU_RING_TYPE_KIQ: + case AMDGPU_RING_TYPE_MES: ip_blk = AMDGPU_XCP_GFX; break; case AMDGPU_RING_TYPE_SDMA: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 1928d9e224fc..8058e8f35d41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -41,6 +41,9 @@ #define AMDGPU_XCP_OPS_KFD (1 << 0) +#define XCP_INST_MASK(num_inst, xcp_id) \ + (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) + struct amdgpu_fpriv; enum AMDGPU_XCP_IP_BLOCK { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 3cdb1e0eca37..cffb2f805de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -201,7 +201,8 @@ union amd_sriov_ras_caps { uint64_t block_mpio : 1; uint64_t block_mmsch : 1; uint64_t poison_propogation_mode : 1; - uint64_t reserved : 43; + uint64_t uniras_supported : 1; + uint64_t reserved : 42; } bits; uint64_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index f9e2edf5260b..d9842aa25283 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -31,9 +31,6 @@ #include "sdma_v4_4_2.h" #include "amdgpu_ip.h" -#define XCP_INST_MASK(num_inst, xcp_id) \ - (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) - void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) { int i; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 7a063e44d429..371ee82a8912 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1463,7 +1463,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx) ctx->vbios_pn[count] = 0; } - pr_info("ATOM BIOS: %s\n", ctx->vbios_pn); + drm_info(ctx->card->dev, "ATOM BIOS: %s\n", ctx->vbios_pn); } static void atom_get_vbios_version(struct atom_context *ctx) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index a51f3414b65d..34644cab6cff 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -218,7 +218,7 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode bd->props.power = BACKLIGHT_POWER_ON; backlight_update_status(bd); - DRM_INFO("amdgpu atom DIG backlight initialized\n"); + drm_info(adev_to_drm(adev), "ATOM DIG backlight initialized\n"); return; @@ -256,7 +256,7 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder) backlight_device_unregister(bd); kfree(pdata); - DRM_INFO("amdgpu atom LVDS backlight unloaded\n"); + drm_info(adev_to_drm(adev), "ATOM LVDS backlight unloaded\n"); } } @@ -1724,7 +1724,7 @@ amdgpu_atombios_encoder_dac_detect(struct drm_encoder *encoder, uint32_t bios_0_scratch; if (!amdgpu_atombios_encoder_dac_load_detect(encoder, connector)) { - DRM_DEBUG_KMS("detect returned false \n"); + DRM_DEBUG_KMS("detect returned false\n"); return connector_status_unknown; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 9cd63b4177bf..c081784a19c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1552,16 +1552,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate == 2) { - DRM_INFO("PCIE gen 3 link speeds already enabled\n"); + drm_info(adev_to_drm(adev), "PCIE gen 3 link speeds already enabled\n"); return; } - DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n"); + drm_info(adev_to_drm(adev), "enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n"); } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) { if (current_data_rate == 1) { - DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + drm_info(adev_to_drm(adev), "PCIE gen 2 link speeds already enabled\n"); return; } - DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); + drm_info(adev_to_drm(adev), "enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) @@ -1957,10 +1957,6 @@ static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } -static void cik_pre_asic_init(struct amdgpu_device *adev) -{ -} - static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1981,7 +1977,6 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .need_reset_on_init = &cik_need_reset_on_init, .get_pcie_replay_count = &cik_get_pcie_replay_count, .supports_baco = &cik_asic_supports_baco, - .pre_asic_init = &cik_pre_asic_init, .query_video_codecs = &cik_query_video_codecs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d75b9940f248..41bbedb8e157 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4041,8 +4041,8 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n", + ring->idx, r); return r; } @@ -4090,7 +4090,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err1; } @@ -4170,7 +4170,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) } if (!adev->gfx.cp_fw_write_wait) - DRM_WARN_ONCE("CP firmware version too old, please update!"); + drm_warn_once(adev_to_drm(adev), "CP firmware version too old, please update!"); } static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) @@ -4575,6 +4575,7 @@ static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q, .init_spm_golden = &gfx_v10_0_init_spm_golden_registers, .update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg, + .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, }; static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) @@ -6378,7 +6379,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) ring = &adev->gfx.gfx_ring[0]; r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -6428,7 +6429,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) ring = &adev->gfx.gfx_ring[1]; r = amdgpu_ring_alloc(ring, 2); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -8318,7 +8319,8 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, } } -static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) +static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -8613,25 +8615,13 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - switch (ring->me) { - case 1: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; - break; - case 2: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; - break; - default: - return; - } - reg_mem_engine = 0; - } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; - reg_mem_engine = 1; /* pfp */ + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, adev->nbio.funcs->get_hdp_flush_req_offset(adev), adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -9395,7 +9385,7 @@ static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - DRM_ERROR("Illegal opcode in command stream \n"); + DRM_ERROR("Illegal opcode in command stream\n"); gfx_v10_0_handle_priv_fault(adev, entry); return 0; } @@ -10124,7 +10114,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8a2ee2de390f..3a4ca104b161 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -120,6 +120,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), @@ -416,7 +420,8 @@ static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { - amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); + amdgpu_mes_unmap_legacy_queue(adev, ring, action, + gpu_addr, seq, 0); return; } @@ -566,8 +571,8 @@ static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 5); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n", + ring->idx, r); return r; } @@ -623,7 +628,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err1; } @@ -917,7 +922,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); return 0; } @@ -1052,10 +1057,14 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info) { + /* for gfx */ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; shadow_info->csa_size = MQD_FWWORKAREA_SIZE; shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + /* for compute */ + shadow_info->eop_size = GFX11_MEC_HPD_SIZE; + shadow_info->eop_alignment = 256; } static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, @@ -1080,6 +1089,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, + .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) @@ -1107,6 +1117,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1589,6 +1600,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 2; @@ -3046,7 +3058,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -3617,7 +3630,7 @@ static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) ring = &adev->gfx.gfx_ring[0]; r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r); return r; } @@ -3662,7 +3675,7 @@ static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) ring = &adev->gfx.gfx_ring[1]; r = amdgpu_ring_alloc(ring, 2); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -4593,7 +4606,7 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) } if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) - r = amdgpu_mes_kiq_hw_init(adev); + r = amdgpu_mes_kiq_hw_init(adev, 0); else r = gfx_v11_0_kiq_resume(adev); if (r) @@ -4783,7 +4796,7 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) adev->gfx.is_poweron = true; if(get_gb_addr_config(adev)) - DRM_WARN("Invalid gb_addr_config !\n"); + drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n"); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && adev->gfx.rs64_enable) @@ -4901,7 +4914,7 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_gfx_disable_kcq(adev, 0)) DRM_ERROR("KCQ disable failed\n"); - amdgpu_mes_kiq_hw_fini(adev); + amdgpu_mes_kiq_hw_fini(adev, 0); } if (amdgpu_sriov_vf(adev)) @@ -5568,7 +5581,8 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) +static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_ring *ring, unsigned vmid) { u32 reg, pre_data, data; @@ -5633,6 +5647,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5671,6 +5686,7 @@ static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5705,6 +5721,7 @@ static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5831,25 +5848,13 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - switch (ring->me) { - case 1: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; - break; - case 2: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; - break; - default: - return; - } - reg_mem_engine = 0; - } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; - reg_mem_engine = 1; /* pfp */ + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, adev->nbio.funcs->get_hdp_flush_req_offset(adev), adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -6664,7 +6669,7 @@ static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - DRM_ERROR("Illegal opcode in command stream \n"); + DRM_ERROR("Illegal opcode in command stream\n"); gfx_v11_0_handle_priv_fault(adev, entry); return 0; } @@ -6827,7 +6832,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false, 0); if (r) { dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); @@ -6842,7 +6847,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, return r; } - r = amdgpu_mes_map_legacy_queue(adev, ring); + r = amdgpu_mes_map_legacy_queue(adev, ring, 0); if (r) { dev_err(adev->dev, "failed to remap kgq\n"); return r; @@ -6990,7 +6995,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); r = gfx_v11_0_reset_compute_pipe(ring); @@ -7003,7 +7008,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, dev_err(adev->dev, "fail to init kcq\n"); return r; } - r = amdgpu_mes_map_legacy_queue(adev, ring); + r = amdgpu_mes_map_legacy_queue(adev, ring, 0); if (r) { dev_err(adev->dev, "failed to remap kcq\n"); return r; @@ -7477,7 +7482,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); + amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index d01d2712cf57..6cd16f016c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -356,7 +356,8 @@ static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { - amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); + amdgpu_mes_unmap_legacy_queue(adev, ring, action, + gpu_addr, seq, 0); return; } @@ -459,8 +460,8 @@ static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 5); if (r) { - dev_err(adev->dev, - "amdgpu: cp failed to lock ring %d (%d).\n", + drm_err(adev_to_drm(adev), + "cp failed to lock ring %d (%d).\n", ring->idx, r); return r; } @@ -517,7 +518,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err1; } @@ -761,7 +762,7 @@ static int gfx_v12_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); return 0; } @@ -909,10 +910,14 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev, static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info) { + /* for gfx */ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; shadow_info->csa_size = MQD_FWWORKAREA_SIZE; shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + /* for compute */ + shadow_info->eop_size = GFX12_MEC_HPD_SIZE; + shadow_info->eop_alignment = 256; } static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev, @@ -937,6 +942,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk, .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info, + .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, }; static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev) @@ -3470,7 +3476,7 @@ static int gfx_v12_0_cp_resume(struct amdgpu_device *adev) } if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) - r = amdgpu_mes_kiq_hw_init(adev); + r = amdgpu_mes_kiq_hw_init(adev, 0); else r = gfx_v12_0_kiq_resume(adev); if (r) @@ -3651,7 +3657,7 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block) adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) - DRM_WARN("Invalid gb_addr_config !\n"); + drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n"); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) gfx_v12_0_config_gfx_rs64(adev); @@ -3759,7 +3765,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_gfx_disable_kcq(adev, 0)) DRM_ERROR("KCQ disable failed\n"); - amdgpu_mes_kiq_hw_fini(adev); + amdgpu_mes_kiq_hw_fini(adev, 0); } if (amdgpu_sriov_vf(adev)) { @@ -3956,6 +3962,7 @@ static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev, } static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev, + int xcc_id, struct amdgpu_ring *ring, unsigned vmid) { @@ -4387,25 +4394,13 @@ static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - switch (ring->me) { - case 1: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; - break; - case 2: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; - break; - default: - return; - } - reg_mem_engine = 0; - } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; - reg_mem_engine = 1; /* pfp */ + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, adev->nbio.funcs->get_hdp_flush_req_offset(adev), adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -5051,7 +5046,7 @@ static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - DRM_ERROR("Illegal opcode in command stream \n"); + DRM_ERROR("Illegal opcode in command stream\n"); gfx_v12_0_handle_priv_fault(adev, entry); return 0; } @@ -5312,7 +5307,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false, 0); if (r) { dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); r = gfx_v12_reset_gfx_pipe(ring); @@ -5326,7 +5321,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, return r; } - r = amdgpu_mes_map_legacy_queue(adev, ring); + r = amdgpu_mes_map_legacy_queue(adev, ring, 0); if (r) { dev_err(adev->dev, "failed to remap kgq\n"); return r; @@ -5427,7 +5422,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); r = gfx_v12_0_reset_compute_pipe(ring); @@ -5440,7 +5435,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, dev_err(adev->dev, "failed to init kcq\n"); return r; } - r = amdgpu_mes_map_legacy_queue(adev, ring); + r = amdgpu_mes_map_legacy_queue(adev, ring, 0); if (r) { dev_err(adev->dev, "failed to remap kcq\n"); return r; @@ -5733,7 +5728,7 @@ static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev, if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); + amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c new file mode 100644 index 000000000000..86cc90a66296 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -0,0 +1,4067 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "amdgpu_atomfirmware.h" +#include "imu_v12_1.h" +#include "soc_v1_0.h" +#include "gfx_v12_1_pkt.h" + +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "soc24_enum.h" +#include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h" + +#include "soc15.h" +#include "clearstate_gfx12.h" +#include "v12_structs.h" +#include "gfx_v12_1.h" +#include "mes_v12_1.h" + +#define GFX12_MEC_HPD_SIZE 2048 +#define NUM_SIMD_PER_CU_GFX12_1 4 + +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0ae06301 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00100000 + +MODULE_FIRMWARE("amdgpu/gc_12_1_0_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_1_0_rlc.bin"); + +#define SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 0x00000001 +#define DEFAULT_SH_MEM_CONFIG \ + ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) + +static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev); +static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev); +static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev); +static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id); +static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val); +static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); +static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint8_t dst_sel); +static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev, + bool enable); +static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev, + bool enable, int xcc_id); +static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev); + +static void gfx_v12_1_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); +} + +static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = ring->wptr_gpu_addr; + uint32_t me = 0, eng_sel = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_COMPUTE: + me = 1; + eng_sel = 0; + break; + case AMDGPU_RING_TYPE_MES: + me = 2; + eng_sel = 5; + break; + default: + WARN_ON(1); + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((me)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v12_1_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { + amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, + seq, kiq_ring->xcc_id); + return; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v12_1_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v12_1_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, + uint32_t flush_type, + bool all_hub) +{ + gfx_v12_1_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); +} + +static const struct kiq_pm4_funcs gfx_v12_1_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v12_1_kiq_set_resources, + .kiq_map_queues = gfx_v12_1_kiq_map_queues, + .kiq_unmap_queues = gfx_v12_1_kiq_unmap_queues, + .kiq_query_status = gfx_v12_1_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v12_1_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i =0; i < num_xcc; i++) + adev->gfx.kiq[i].pmf = &gfx_v12_1_kiq_pm4_funcs; +} + +static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, + uint32_t mask, uint32_t inv) +{ + if (mem_space == 0) { + addr0 = soc_v1_0_normalize_xcc_reg_offset(addr0); + addr1 = soc_v1_0_normalize_xcc_reg_offset(addr1); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v12_1_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch_reg0_offset, xcc_offset; + uint32_t tmp = 0; + unsigned i; + int r; + + /* Use register offset which is local to XCC in the packet */ + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); + WREG32(scratch_reg0_offset, 0xCAFEDEAD); + tmp = RREG32(scratch_reg0_offset); + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + dev_err(adev->dev, + "amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { + gfx_v12_1_ring_emit_wreg(ring, xcc_offset, 0xDEADBEEF); + } else { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, xcc_offset - + PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, 0xDEADBEEF); + } + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch_reg0_offset); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; +} + +static int gfx_v12_1_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + uint64_t gpu_addr; + volatile uint32_t *cpu_ptr; + long r; + + /* MES KIQ fw hasn't indirect buffer support for now */ + if (adev->enable_mes_kiq && + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + return 0; + + memset(&ib, 0, sizeof(ib)); + + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; + + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + goto err2; + } + + if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; +err2: + amdgpu_ib_free(&ib, NULL); + dma_fence_put(f); +err1: + amdgpu_device_wb_free(adev, index); + return r; +} + +static void gfx_v12_1_free_microcode(struct amdgpu_device *adev) +{ + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + + kfree(adev->gfx.rlc.register_list_format); +} + +static int gfx_v12_1_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) +{ + const struct psp_firmware_header_v1_0 *toc_hdr; + int err = 0; + + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_toc.bin", ucode_prefix); + if (err) + goto out; + + toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; + adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); + adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc.start_addr = (uint8_t *)toc_hdr + + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); + return 0; +out: + amdgpu_ucode_release(&adev->psp.toc_fw); + return err; +} + +static int gfx_v12_1_init_microcode(struct amdgpu_device *adev) +{ + char ucode_prefix[15]; + int err; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + uint16_t version_major; + uint16_t version_minor; + + DRM_DEBUG("\n"); + + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + if (!amdgpu_sriov_vf(adev)) { + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc.bin", ucode_prefix); + if (err) + goto out; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) + goto out; + } + + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + err = gfx_v12_1_init_toc_microcode(adev, ucode_prefix); + + /* only one MEC for gfx 12 */ + adev->gfx.mec2_fw = NULL; + + if (adev->gfx.imu.funcs) { + if (adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + dev_err(adev->dev, "Failed to load imu firmware!\n"); + } + } + +out: + if (err) { + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + } + + return err; +} + +static u32 gfx_v12_1_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + count += 1; + + for (sect = gfx12_cs_data; sect->section != NULL; ++sect) { + if (sect->id == SECT_CONTEXT) { + for (ext = sect->section; ext->extent != NULL; ++ext) + count += 2 + ext->reg_count; + } else + return 0; + } + + return count; +} + +static void gfx_v12_1_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) +{ + u32 count = 0, clustercount = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + count += 1; + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + if (sect->id == SECT_CONTEXT) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + clustercount++; + buffer[count++] = ext->reg_count; + buffer[count++] = ext->reg_index; + + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } + } else + return; + } + + buffer[0] = clustercount; +} + +static void gfx_v12_1_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} + +static void gfx_v12_1_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) +{ + int xcc_id, num_xcc; + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)]; + + reg_access_ctrl->grbm_cntl = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL); + reg_access_ctrl->grbm_idx = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); + + reg_access_ctrl->vfi_cmd = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_CMD); + reg_access_ctrl->vfi_stat = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_STAT); + reg_access_ctrl->vfi_addr = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_ADDR); + reg_access_ctrl->vfi_data = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_DATA); + reg_access_ctrl->vfi_grbm_cntl = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_CNTL); + reg_access_ctrl->vfi_grbm_idx = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_INDEX); + reg_access_ctrl->vfi_grbm_cntl_data = 0; + reg_access_ctrl->vfi_grbm_idx_data = 0; + } + adev->gfx.rlc.rlcg_reg_access_supported = true; +} + +static int gfx_v12_1_rlc_init(struct amdgpu_device *adev) +{ + const struct cs_section_def *cs_data; + int r, i, num_xcc; + + adev->gfx.rlc.cs_data = gfx12_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) + return r; + } + + /* init spm vmid with 0xf */ + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, i, NULL, 0xf); + } + + return 0; +} + +static void gfx_v12_1_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); +} + +static int gfx_v12_1_mec_init(struct amdgpu_device *adev) +{ + int r, i, num_xcc; + u32 *hpd; + size_t mec_hpd_size; + + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, + AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * + GFX12_MEC_HPD_SIZE * num_xcc; + + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v12_1_mec_fini(adev); + return r; + } + + memset(hpd, 0, mec_hpd_size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } + + return 0; +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t wave, + uint32_t address) +{ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT)); + return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t wave, + uint32_t thread, uint32_t regno, + uint32_t num, uint32_t *out) +{ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); +} + +static void gfx_v12_1_read_wave_data(struct amdgpu_device *adev, + uint32_t xcc_id, + uint32_t simd, uint32_t wave, + uint32_t *dst, int *no_fields) +{ + /* in gfx12 the SIMD_ID is specified as part of the INSTANCE + * field when performing a select_se_sh so it should be + * zero here */ + WARN_ON(simd != 0); + + /* type 4 wave data */ + dst[(*no_fields)++] = 4; + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID1); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID2); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS2); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_DBG1); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_MODE); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATE_PRIV); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_PRIV); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_USER); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_TRAP_CTRL); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_ACTIVE); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_VALID_AND_IDLE); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_SCHED_MODE); +} + +static void gfx_v12_1_read_wave_sgprs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + WARN_ON(simd != 0); + + wave_read_regs(adev, xcc_id, wave, 0, + start + SQIND_WAVE_SGPRS_OFFSET, + size, dst); +} + +static void gfx_v12_1_read_wave_vgprs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs(adev, xcc_id, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, + size, dst); +} + +static void gfx_v12_1_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) +{ + soc_v1_0_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); +} + +static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev) +{ + /* Fill this in when the interface is ready */ + return 1; +} + +static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) +{ + int logic_xcc; + int xcc = (ih_node & 0x7) - 2 + (ih_node >> 3) * 4; + + for (logic_xcc = 0; logic_xcc < NUM_XCC(adev->gfx.xcc_mask); logic_xcc++) { + if (xcc == GET_INST(GC, logic_xcc)) + return logic_xcc; + } + + dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); + return -EINVAL; +} + +static const struct amdgpu_gfx_funcs gfx_v12_1_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v12_1_get_gpu_clock_counter, + .select_se_sh = &gfx_v12_1_xcc_select_se_sh, + .read_wave_data = &gfx_v12_1_read_wave_data, + .read_wave_sgprs = &gfx_v12_1_read_wave_sgprs, + .read_wave_vgprs = &gfx_v12_1_read_wave_vgprs, + .select_me_pipe_q = &gfx_v12_1_select_me_pipe_q, + .update_perfmon_mgcg = &gfx_v12_1_update_perf_clk, + .get_xccs_per_xcp = &gfx_v12_1_get_xccs_per_xcp, + .ih_node_to_logical_xcc = &gfx_v12_1_ih_to_xcc_inst, +}; + +static int gfx_v12_1_gpu_early_init(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 1, 0): + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + break; + default: + BUG(); + break; + } + + return 0; +} + +static int gfx_v12_1_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int xcc_id, int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring; + unsigned int hw_prio; + uint32_t xcc_doorbell_start; + + ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings + + ring_id]; + + /* mec0 is me1 */ + ring->xcc_id = xcc_id; + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + xcc_doorbell_start = adev->doorbell_index.mec_ring0 + + xcc_id * adev->doorbell_index.xcc_doorbell_range; + ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id + xcc_id * adev->gfx.num_compute_rings) * + GFX12_MEC_HPD_SIZE; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + sprintf(ring->name, "comp_%d.%d.%d.%d", + ring->xcc_id, ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); + if (r) + return r; + + return 0; +} + +static struct { + SOC24_FIRMWARE_ID id; + unsigned int offset; + unsigned int size; + unsigned int size_x16; + unsigned int num_inst; +} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX]; + +#define RLC_TOC_OFFSET_DWUNIT 8 +#define RLC_SIZE_MULTIPLE 1024 +#define RLC_TOC_UMF_SIZE_inM 23ULL +#define RLC_TOC_FORMAT_API 165ULL + +#define RLC_NUM_INS_CODE0 1 +#define RLC_NUM_INS_CODE1 8 +#define RLC_NUM_INS_CODE2 2 +#define RLC_NUM_INS_CODE3 16 + +static void gfx_v12_1_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) +{ + RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc; + + while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) { + rlc_autoload_info[ucode->id].id = ucode->id; + rlc_autoload_info[ucode->id].offset = + ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4; + rlc_autoload_info[ucode->id].size = + ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 : + ucode->size * 4; + switch (ucode->vfflr_image_code) { + case 0: + rlc_autoload_info[ucode->id].num_inst = + RLC_NUM_INS_CODE0; + break; + case 1: + rlc_autoload_info[ucode->id].num_inst = + RLC_NUM_INS_CODE1; + break; + case 2: + rlc_autoload_info[ucode->id].num_inst = + RLC_NUM_INS_CODE2; + break; + case 3: + rlc_autoload_info[ucode->id].num_inst = + RLC_NUM_INS_CODE3; + break; + default: + dev_err(adev->dev, + "Invalid Instance number detected\n"); + break; + } + ucode++; + } +} + +static uint32_t gfx_v12_1_calc_toc_total_size(struct amdgpu_device *adev) +{ + uint32_t total_size = 0; + SOC24_FIRMWARE_ID id; + + gfx_v12_1_parse_rlc_toc(adev, adev->psp.toc.start_addr); + + for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++) + total_size += rlc_autoload_info[id].size; + + /* In case the offset in rlc toc ucode is aligned */ + if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset) + total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset + + rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size; + if (total_size < (RLC_TOC_UMF_SIZE_inM << 20)) + total_size = RLC_TOC_UMF_SIZE_inM << 20; + + return total_size; +} + +static int gfx_v12_1_rlc_autoload_buffer_init(struct amdgpu_device *adev) +{ + int r; + uint32_t total_size; + + total_size = gfx_v12_1_calc_toc_total_size(adev); + + r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); + + if (r) { + dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); + return r; + } + + return 0; +} + +static void gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, + SOC24_FIRMWARE_ID id, + const void *fw_data, + uint32_t fw_size) +{ + uint32_t toc_offset; + uint32_t toc_fw_size, toc_fw_inst_size; + char *ptr = adev->gfx.rlc.rlc_autoload_ptr; + int i, num_inst; + + if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX) + return; + + toc_offset = rlc_autoload_info[id].offset; + toc_fw_size = rlc_autoload_info[id].size; + num_inst = rlc_autoload_info[id].num_inst; + toc_fw_inst_size = toc_fw_size / num_inst; + + if (fw_size == 0) + fw_size = toc_fw_inst_size; + + if (fw_size > toc_fw_inst_size) + fw_size = toc_fw_inst_size; + + for (i = 0; i < num_inst; i++) { + if ((num_inst == RLC_NUM_INS_CODE0) || + ((1 << (i / 2)) & adev->gfx.xcc_mask)) { + memcpy(ptr + toc_offset + i * toc_fw_inst_size, fw_data, fw_size); + + if (fw_size < toc_fw_inst_size) + memset(ptr + toc_offset + fw_size + i * toc_fw_inst_size, + 0, toc_fw_inst_size - fw_size); + } + } +} + +static void +gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) +{ + void *data; + uint32_t size; + uint32_t *toc_ptr; + + data = adev->psp.toc.start_addr; + size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size; + + toc_ptr = (uint32_t *)data + size / 4 - 2; + *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1; + + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC, + data, size); +} + +static void +gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct gfx_firmware_header_v2_0 *cpv2_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + const struct rlc_firmware_header_v2_1 *rlcv21_hdr; + const struct rlc_firmware_header_v2_2 *rlcv22_hdr; + uint16_t version_major, version_minor; + + /* mec ucode */ + cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + /* instruction */ + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC, + fw_data, fw_size); + /* data */ + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(cpv2_hdr->data_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK, + fw_data, fw_size); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK, + fw_data, fw_size); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK, + fw_data, fw_size); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK, + fw_data, fw_size); + + /* rlc ucode */ + rlc_hdr = (const struct rlc_firmware_header_v2_0 *) + adev->gfx.rlc_fw->data; + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE, + fw_data, fw_size); + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2) { + if (version_minor >= 1) { + rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes)); + fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH, + fw_data, fw_size); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes)); + fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM, + fw_data, fw_size); + } + if (version_minor >= 2) { + rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); + fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE, + fw_data, fw_size); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); + fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT, + fw_data, fw_size); + } + } +} + +static void +gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct sdma_firmware_header_v3_0 *sdma_hdr; + + if (adev->sdma.instance[0].fw) { + sdma_hdr = (const struct sdma_firmware_header_v3_0 *) + adev->sdma.instance[0].fw->data; + fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + + le32_to_cpu(sdma_hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes); + + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0, + fw_data, fw_size); + } +} + +static void +gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + unsigned fw_size; + const struct mes_firmware_header_v1_0 *mes_hdr; + int pipe, ucode_id, data_id; + + for (pipe = 0; pipe < 2; pipe++) { + if (pipe == 0) { + ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0; + data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK; + } else { + ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1; + data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK; + } + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size); + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size); + } +} + +static int gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) +{ + uint32_t rlc_g_offset, rlc_g_size; + uint64_t gpu_addr; + uint32_t data; + int i, num_xcc; + + /* RLC autoload sequence 2: copy ucode */ + gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(adev); + gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(adev); + gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(adev); + gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(adev); + + rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset; + rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size; + gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + WREG32_SOC15(GC, GET_INST(GC, i), + regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, + upper_32_bits(gpu_addr)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, + lower_32_bits(gpu_addr)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGFX_IMU_RLC_BOOTLOADER_SIZE, + rlc_g_size); + } + + if (adev->gfx.imu.funcs) { + /* RLC autoload sequence 3: load IMU fw */ + if (adev->gfx.imu.funcs->load_microcode) + adev->gfx.imu.funcs->load_microcode(adev); + } + + /* unhalt rlc to start autoload */ + for (i = 0; i < num_xcc; i++) { + data = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE); + data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1); + data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE, data); + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK); + } + + return 0; +} + +static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + int i, j, k, r, ring_id = 0; + unsigned num_compute_rings; + int xcc_id, num_xcc; + struct amdgpu_device *adev = ip_block->adev; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 1, 0): + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + default: + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 2; + adev->gfx.mec.num_queue_per_pipe = 4; + break; + } + + /* recalculate compute rings to use based on hardware configuration */ + num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe) / 2; + adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, + num_compute_rings); + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, + GFX_12_1_0__SRCID__CP_EOP_INTERRUPT, + &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, + GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, + GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + r = gfx_v12_1_rlc_init(adev); + if (r) { + dev_err(adev->dev, "Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v12_1_mec_init(adev); + if (r) { + dev_err(adev->dev, "Failed to init MEC BOs!\n"); + return r; + } + + /* set up the compute queues - allocate horizontally across pipes */ + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, + xcc_id, i, k, j)) + continue; + + r = gfx_v12_1_compute_ring_init(adev, ring_id, + xcc_id, i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + if (!adev->enable_mes_kiq) { + r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, xcc_id); + if (r) { + dev_err(adev->dev, "Failed to init KIQ BOs!\n"); + return r; + } + + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); + if (r) + return r; + } + + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_1_compute_mqd), xcc_id); + if (r) + return r; + } + + /* allocate visible FB for rlc auto-loading fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v12_1_rlc_autoload_buffer_init(adev); + if (r) + return r; + } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = gfx_v12_1_init_cp_compute_microcode_bo(adev); + if (r) + return r; + } + + r = gfx_v12_1_gpu_early_init(adev); + if (r) + return r; + + r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + + return 0; +} + +static void gfx_v12_1_rlc_autoload_buffer_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); +} + +static int gfx_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + int i, num_xcc; + struct amdgpu_device *adev = ip_block->adev; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + for (i = 0; i < num_xcc; i++) { + amdgpu_gfx_mqd_sw_fini(adev, i); + + if (!adev->enable_mes_kiq) { + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); + amdgpu_gfx_kiq_fini(adev, i); + } + } + + gfx_v12_1_rlc_fini(adev); + gfx_v12_1_mec_fini(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + gfx_v12_1_rlc_autoload_buffer_fini(adev); + + gfx_v12_1_free_microcode(adev); + amdgpu_gfx_sysfs_fini(adev); + + return 0; +} + +static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); +} + +static u32 gfx_v12_1_get_sa_active_bitmap(struct amdgpu_device *adev, + int xcc_id) +{ + u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; + + gc_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SA_UNIT_DISABLE); + gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, + CC_GC_SA_UNIT_DISABLE, + SA_DISABLE); + gc_user_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SA_UNIT_DISABLE); + gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, + GC_USER_SA_UNIT_DISABLE, + SA_DISABLE); + sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines); + + return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); +} + +static u32 gfx_v12_1_get_rb_active_bitmap(struct amdgpu_device *adev, + int xcc_id) +{ + u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; + u32 rb_mask; + + gc_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regCC_RB_BACKEND_DISABLE); + gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, + CC_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + gc_user_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGC_USER_RB_BACKEND_DISABLE); + gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, + GC_USER_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * + adev->gfx.config.max_shader_engines); + + return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); +} + +static void gfx_v12_1_setup_rb(struct amdgpu_device *adev) +{ + u32 rb_bitmap_width_per_sa; + u32 max_sa; + u32 active_sa_bitmap; + u32 global_active_rb_bitmap; + u32 active_rb_bitmap = 0; + u32 i; + int xcc_id; + + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + /* query sa bitmap from SA_UNIT_DISABLE registers */ + active_sa_bitmap = gfx_v12_1_get_sa_active_bitmap(adev, xcc_id); + /* query rb bitmap from RB_BACKEND_DISABLE registers */ + global_active_rb_bitmap = gfx_v12_1_get_rb_active_bitmap(adev, xcc_id); + + /* generate active rb bitmap according to active sa bitmap */ + max_sa = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se; + rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + for (i = 0; i < max_sa; i++) { + if (active_sa_bitmap & (1 << i)) + active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + } + + active_rb_bitmap |= global_active_rb_bitmap; + } + + adev->gfx.config.backend_enable_mask = active_rb_bitmap; + adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); +} + +static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev, + int xcc_id) +{ + int i; + uint32_t sh_mem_bases; + uint32_t data; + + /* + * Configure apertures: + * LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB) + * Scratch: 0x10000000'00000000 - 0x10000001'00000000 (4GB) + */ + sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 58)); + sh_mem_bases = REG_SET_FIELD(sh_mem_bases, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + + mutex_lock(&adev->srbm_mutex); + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases); + + /* Enable trap for each kfd vmid. */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data); + + /* Disable VGPR deallocation instruction for each KFD vmid. */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG); + data = REG_SET_FIELD(data, SQ_DEBUG, DISABLE_VGPR_DEALLOC, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG, data); + } + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v12_1_tcp_harvest(struct amdgpu_device *adev) +{ + /* TODO: harvest feature to be added later. */ +} + +static void gfx_v12_1_get_tcc_info(struct amdgpu_device *adev) +{ +} + +static void gfx_v12_1_xcc_constants_init(struct amdgpu_device *adev, + int xcc_id) +{ + u32 tmp; + int i; + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { + soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + if (i != 0) { + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 58)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, tmp); + } + } + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v12_1_xcc_init_compute_vmid(adev, xcc_id); +} + +static void gfx_v12_1_constants_init(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + gfx_v12_1_setup_rb(adev); + gfx_v12_1_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v12_1_get_tcc_info(adev); + adev->gfx.config.pa_sc_tile_steering_override = 0; + + for (i = 0; i < num_xcc; i++) + gfx_v12_1_xcc_constants_init(adev, i); +} + +static void gfx_v12_1_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + u32 tmp; + + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0); + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp); +} + +static int gfx_v12_1_xcc_init_csb(struct amdgpu_device *adev, + int xcc_id) +{ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + + return 0; +} + +static void gfx_v12_1_xcc_rlc_stop(struct amdgpu_device *adev, + int xcc_id) +{ + u32 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL, tmp); +} + +static void gfx_v12_1_rlc_stop(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + gfx_v12_1_xcc_rlc_stop(adev, i); +} + +static void gfx_v12_1_xcc_rlc_reset(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), + GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), + GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v12_1_rlc_reset(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + gfx_v12_1_xcc_rlc_reset(adev, i); +} + +static void gfx_v12_1_xcc_rlc_smu_handshake_cntl(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t rlc_pg_cntl; + + rlc_pg_cntl = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL); + + if (!enable) { + /* RLC_PG_CNTL[23] = 0 (default) + * RLC will wait for handshake acks with SMU + * GFXOFF will be enabled + * RLC_PG_CNTL[23] = 1 + * RLC will not issue any message to SMU + * hence no handshake between SMU & RLC + * GFXOFF will be disabled + */ + rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; + } else + rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, rlc_pg_cntl); +} + +static void gfx_v12_1_xcc_rlc_start(struct amdgpu_device *adev, + int xcc_id) +{ + /* TODO: enable rlc & smu handshake until smu + * and gfxoff feature works as expected */ + if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) + gfx_v12_1_xcc_rlc_smu_handshake_cntl(adev, false, xcc_id); + + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); +} + +static void gfx_v12_1_rlc_start(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + gfx_v12_1_xcc_rlc_start(adev, i); + } +} + +static void gfx_v12_1_xcc_rlc_enable_srm(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t tmp; + + /* enable Save Restore Machine */ + tmp = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL), tmp); +} + +static void gfx_v12_1_xcc_load_rlcg_microcode(struct amdgpu_device *adev, + int xcc_id) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, + RLCG_UCODE_LOADING_START_ADDRESS); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_GPM_UCODE_DATA, + le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_GPM_UCODE_ADDR, + adev->gfx.rlc_fw_version); +} + +static void gfx_v12_1_xcc_load_rlc_iram_dram_microcode(struct amdgpu_device *adev, + int xcc_id) +{ + const struct rlc_firmware_header_v2_2 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + u32 tmp; + + hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); + fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_IRAM_ADDR, 0); + + for (i = 0; i < fw_size; i++) { + if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) + msleep(1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_LX6_IRAM_DATA, + le32_to_cpup(fw_data++)); + } + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); + fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_LX6_DRAM_ADDR, 0); + for (i = 0; i < fw_size; i++) { + if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) + msleep(1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_LX6_DRAM_DATA, + le32_to_cpup(fw_data++)); + } + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); + + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL); + tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL, tmp); +} + +static int gfx_v12_1_xcc_rlc_load_microcode(struct amdgpu_device *adev, + int xcc_id) +{ + const struct rlc_firmware_header_v2_0 *hdr; + uint16_t version_major; + uint16_t version_minor; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + version_major = le16_to_cpu(hdr->header.header_version_major); + version_minor = le16_to_cpu(hdr->header.header_version_minor); + + if (version_major == 2) { + gfx_v12_1_xcc_load_rlcg_microcode(adev, xcc_id); + if (amdgpu_dpm == 1) { + if (version_minor >= 2) + gfx_v12_1_xcc_load_rlc_iram_dram_microcode(adev, xcc_id); + } + + return 0; + } + + return -EINVAL; +} + +static int gfx_v12_1_xcc_rlc_resume(struct amdgpu_device *adev, + int xcc_id) +{ + int r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + gfx_v12_1_xcc_init_csb(adev, xcc_id); + + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ + gfx_v12_1_xcc_rlc_enable_srm(adev, xcc_id); + } else { + if (amdgpu_sriov_vf(adev)) { + gfx_v12_1_xcc_init_csb(adev, xcc_id); + return 0; + } + + gfx_v12_1_xcc_rlc_stop(adev, xcc_id); + + /* disable CG */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0); + + /* disable PG */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy rlc firmware loading */ + r = gfx_v12_1_xcc_rlc_load_microcode(adev, xcc_id); + if (r) + return r; + } + + gfx_v12_1_xcc_init_csb(adev, xcc_id); + + gfx_v12_1_xcc_rlc_start(adev, xcc_id); + } + + return 0; +} + +static int gfx_v12_1_rlc_resume(struct amdgpu_device *adev) +{ + int r, i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + r = gfx_v12_1_xcc_rlc_resume(adev, i); + if (r) + return r; + } + + return 0; +} + +static void gfx_v12_1_xcc_config_gfx_rs64(struct amdgpu_device *adev, + int xcc_id) +{ + const struct gfx_firmware_header_v2_0 *mec_hdr; + uint32_t pipe_id, tmp; + + mec_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + + /* config mec program start addr */ + for (pipe_id = 0; pipe_id < 4; pipe_id++) { + soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START, + mec_hdr->ucode_start_addr_lo >> 2 | + mec_hdr->ucode_start_addr_hi << 30); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI, + mec_hdr->ucode_start_addr_hi >> 2); + } + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + + /* reset mec pipe */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp); + + /* clear mec pipe reset */ + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp); +} + +static void gfx_v12_1_config_gfx_rs64(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (i = 0; i < num_xcc; i++) + gfx_v12_1_xcc_config_gfx_rs64(adev, i); +} + +static void gfx_v12_1_xcc_set_mec_ucode_start_addr(struct amdgpu_device *adev, + int xcc_id) +{ + const struct gfx_firmware_header_v2_0 *cp_hdr; + unsigned pipe_id; + + cp_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + mutex_lock(&adev->srbm_mutex); + for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) { + soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START, + cp_hdr->ucode_start_addr_lo >> 2 | + cp_hdr->ucode_start_addr_hi << 30); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI, + cp_hdr->ucode_start_addr_hi >> 2); + } + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); +} + +static int gfx_v12_1_xcc_wait_for_rlc_autoload_complete(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t cp_status; + uint32_t bootload_status; + int i; + + for (i = 0; i < adev->usec_timeout; i++) { + cp_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_STAT); + bootload_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_RLCS_BOOTLOAD_STATUS); + + if ((cp_status == 0) && + (REG_GET_FIELD(bootload_status, + RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { + break; + } + udelay(1); + if (amdgpu_emu_mode) + msleep(10); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, + "rlc autoload: xcc%d gc ucode autoload timeout\n", xcc_id); + return -ETIMEDOUT; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id); + } + + return 0; +} + +static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) +{ + int xcc_id; + + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) + gfx_v12_1_xcc_wait_for_rlc_autoload_complete(adev, xcc_id); + + return 0; +} + +static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + u32 data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, + enable ? 0 : 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, data); + + adev->gfx.kiq[xcc_id].ring.sched.ready = enable; + + udelay(50); +} + +static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v2_0 *mec_hdr; + const __le32 *fw_ucode, *fw_data; + u32 fw_ucode_size, fw_data_size; + u32 *fw_ucode_ptr, *fw_data_ptr; + int i, r, xcc_id; + + if (!adev->gfx.mec_fw) + return -EINVAL; + + mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->ucode_offset_bytes)); + fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); + + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->data_offset_bytes)); + fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); + + if (adev->gfx.mec.mec_fw_obj == NULL) { + r = amdgpu_bo_create_reserved(adev, fw_ucode_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw_ucode_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); + gfx_v12_1_mec_fini(adev); + return r; + } + + memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + } + + if (adev->gfx.mec.mec_fw_data_obj == NULL) { + r = amdgpu_bo_create_reserved(adev, + ALIGN(fw_data_size, 64 * 1024) * + adev->gfx.mec.num_pipe_per_mec * NUM_XCC(adev->gfx.xcc_mask), + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.mec_fw_data_obj, + &adev->gfx.mec.mec_fw_data_gpu_addr, + (void **)&fw_data_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mec fw data bo\n", r); + gfx_v12_1_mec_fini(adev); + return r; + } + + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * + ALIGN(fw_data_size, 64 * 1024) / 4; + memcpy(fw_data_ptr + offset, fw_data, fw_data_size); + } + } + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); + } + + return 0; +} + +static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev, + int xcc_id) +{ + const struct gfx_firmware_header_v2_0 *mec_hdr; + u32 fw_data_size; + u32 tmp, i, usec_timeout = 50000; /* Wait for 50 ms */ + + if (!adev->gfx.mec_fw) + return -EINVAL; + + mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; + fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); + + gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id); + + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp); + + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp); + + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id)); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO, + lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + + (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * + ALIGN(fw_data_size, 64 * 1024))); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + + (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * + ALIGN(fw_data_size, 64 * 1024))); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO, + lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + } + mutex_unlock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, + INVALIDATE_DCACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate data cache\n"); + return -EINVAL; + } + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id); + + return 0; +} + +static void gfx_v12_1_xcc_kiq_setting(struct amdgpu_ring *ring, + int xcc_id) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); +} + +static void gfx_v12_1_xcc_cp_set_doorbell_range(struct amdgpu_device *adev, + int xcc_id) +{ + /* disable gfx engine doorbell range */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_LOWER, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_UPPER, 0); + + /* set compute engine doorbell range */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER, + ((adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER, + ((adev->doorbell_index.userqueue_end + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); +} + +static int gfx_v12_1_compute_mqd_init(struct amdgpu_device *adev, void *m, + struct amdgpu_mqd_prop *prop) +{ + struct v12_1_compute_mqd *mqd = m; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000007; + + eop_base_addr = prop->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; + + if (prop->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, prop->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = regCP_MQD_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(prop->queue_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = prop->rptr_gpu_addr; + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = prop->wptr_gpu_addr; + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (prop->use_doorbell) { + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, prop->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x63); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = regCP_HQD_IB_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 1); + mqd->cp_hqd_ib_control = tmp; + + /* set static priority for a compute queue/ring */ + mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; + mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; + + mqd->cp_mqd_stride_size = prop->mqd_stride_size ? prop->mqd_stride_size : + sizeof(struct v12_1_compute_mqd); + + mqd->cp_hqd_active = prop->hqd_active; + + return 0; +} + +static int gfx_v12_1_xcc_kiq_init_register(struct amdgpu_ring *ring, + int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_1_compute_mqd *mqd = ring->mqd_ptr; + int j; + + /* inactivate the queue */ + if (amdgpu_sriov_vf(adev)) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0); + + /* disable wptr polling */ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, + mqd->cp_hqd_dequeue_request); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, + mqd->cp_hqd_pq_rptr); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, + mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, + mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v12_1_xcc_kiq_init_queue(struct amdgpu_ring *ring, + int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_1_compute_mqd *mqd = ring->mqd_ptr; + + gfx_v12_1_xcc_kiq_setting(ring, xcc_id); + + if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.kiq[xcc_id].mqd_backup) + memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); + gfx_v12_1_xcc_kiq_init_register(ring, xcc_id); + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(*mqd)); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); + amdgpu_ring_init_mqd(ring); + gfx_v12_1_xcc_kiq_init_register(ring, xcc_id); + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.kiq[xcc_id].mqd_backup) + memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(*mqd)); + } + + return 0; +} + +static int gfx_v12_1_xcc_kcq_init_queue(struct amdgpu_ring *ring, + int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_1_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); + amdgpu_ring_init_mqd(ring); + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { + /* restore MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + /* reset ring buffer */ + ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v12_1_xcc_kiq_resume(struct amdgpu_device *adev, + int xcc_id) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->gfx.kiq[xcc_id].ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); + return r; + } + + gfx_v12_1_xcc_kiq_init_queue(ring, xcc_id); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + ring->sched.ready = true; + return 0; +} + +static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev, + int xcc_id) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + if (!amdgpu_async_gfx_ring) + gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_1_xcc_kcq_init_queue(ring, xcc_id); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kcq(adev, xcc_id); +done: + return r; +} + +static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask) +{ + int r, i, xcc_id; + struct amdgpu_ring *ring; + + for_each_inst(xcc_id, xcc_mask) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy firmware loading */ + r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id); + if (r) + return r; + } + + /* GFX CGCG and LS is set by default */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, true, xcc_id); + + gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id); + + gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id); + + if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) + r = amdgpu_mes_kiq_hw_init(adev, xcc_id); + else + r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id); + if (r) + return r; + + r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id); + if (r) + return r; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + + return 0; +} + +static int gfx_v12_1_cp_resume(struct amdgpu_device *adev) +{ + int num_xcc, num_xcp, num_xcc_per_xcp; + uint16_t xcc_mask; + int r = 0; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (amdgpu_sriov_vf(adev)) { + enum amdgpu_gfx_partition mode; + + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE); + if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + return -EINVAL; + if (adev->gfx.funcs && + adev->gfx.funcs->get_xccs_per_xcp) { + num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); + adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp; + num_xcp = num_xcc / num_xcc_per_xcp; + } else { + return -EINVAL; + } + r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); + + } else { + if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE) == + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, + amdgpu_user_partt_mode); + } + + if (r) + return r; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + + return gfx_v12_1_xcc_cp_resume(adev, xcc_mask); +} + +static int gfx_v12_1_gfxhub_enable(struct amdgpu_device *adev) +{ + int r, i; + bool value; + + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) + return r; + + value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? + false : true; + + adev->gfxhub.funcs->set_fault_enable_default(adev, value); + /* TODO investigate why TLB flush is needed, + * are we missing a flush somewhere else? */ + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { + if (AMDGPU_IS_GFXHUB(i)) + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(i), 0); + } + + return 0; +} + +static int get_gb_addr_config(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG_READ); + if (gb_addr_config == 0) + return -EINVAL; + + adev->gfx.config.gb_addr_config_fields.num_pkrs = + 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG_READ, NUM_PKRS); + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG_READ, NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG_READ, MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG_READ, NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG_READ, NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG_READ, PIPE_INTERLEAVE_SIZE)); + + return 0; +} + +static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG); + data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data); +} + +static void gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t val; + + /* Set the TCP UTCL0 register to enable atomics */ + val = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regTCP_UTCL0_THRASHING_CTRL); + val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, THRASHING_EN, 0x2); + val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, + RETRY_FRAGMENT_THRESHOLD_UP_EN, 0x1); + val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, + RETRY_FRAGMENT_THRESHOLD_DOWN_EN, 0x1); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regTCP_UTCL0_THRASHING_CTRL, val); +} + +static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + + /* Set the TCP UTCL0 register to enable atomics */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1); + data = REG_SET_FIELD(data, TCP_UTCL0_CNTL1, ATOMIC_REQUESTER_EN, 0x1); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1, data); +} + +static void gfx_v12_1_xcc_disable_burst(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGL1_DRAM_BURST_CTRL, 0xf); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGLARB_DRAM_BURST_CTRL, 0xf); +} + +static void gfx_v12_1_xcc_disable_early_write_ack(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3); + data = REG_SET_FIELD(data, TCP_CNTL3, DISABLE_EARLY_WRITE_ACK, 0x1); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3, data); +} + +static void gfx_v12_1_xcc_disable_tcp_spill_cache(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL); + data = REG_SET_FIELD(data, TCP_CNTL, TCP_SPILL_CACHE_DISABLE, 0x1); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL, data); +} + +static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) { + gfx_v12_1_xcc_disable_burst(adev, i); + gfx_v12_1_xcc_enable_atomics(adev, i); + gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(adev, i); + gfx_v12_1_xcc_disable_early_write_ack(adev, i); + gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i); + } +} + +static int gfx_v12_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + int r, i, num_xcc; + struct amdgpu_device *adev = ip_block->adev; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + /* rlc autoload firmware */ + r = gfx_v12_1_rlc_backdoor_autoload_enable(adev); + if (r) + return r; + } else { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + if (adev->gfx.imu.funcs) { + if (adev->gfx.imu.funcs->load_microcode) + adev->gfx.imu.funcs->load_microcode(adev); + } + + for (i = 0; i < num_xcc; i++) { + /* disable gpa mode in backdoor loading */ + gfx_v12_1_xcc_disable_gpa_mode(adev, i); + } + } + } + + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + r = gfx_v12_1_wait_for_rlc_autoload_complete(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); + return r; + } + } + + adev->gfx.is_poweron = true; + + if (get_gb_addr_config(adev)) + DRM_WARN("Invalid gb_addr_config !\n"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + gfx_v12_1_config_gfx_rs64(adev); + + r = gfx_v12_1_gfxhub_enable(adev); + if (r) + return r; + + gfx_v12_1_init_golden_registers(adev); + + gfx_v12_1_constants_init(adev); + + if (adev->nbio.funcs->gc_doorbell_init) + adev->nbio.funcs->gc_doorbell_init(adev); + + r = gfx_v12_1_rlc_resume(adev); + if (r) + return r; + + /* + * init golden registers and rlc resume may override some registers, + * reconfig them here + */ + gfx_v12_1_tcp_harvest(adev); + + r = gfx_v12_1_cp_resume(adev); + if (r) + return r; + + return r; +} + +static void gfx_v12_1_xcc_fini(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t tmp; + + if (!adev->no_hw_access) { + if (amdgpu_gfx_disable_kcq(adev, xcc_id)) + DRM_ERROR("KCQ disable failed\n"); + + amdgpu_mes_kiq_hw_fini(adev, xcc_id); + } + + if (amdgpu_sriov_vf(adev)) { + /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + } + gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id); + gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); +} + +static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, num_xcc; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + gfx_v12_1_xcc_fini(adev, i); + } + + adev->gfxhub.funcs->gart_disable(adev); + + adev->gfx.is_poweron = false; + + return 0; +} + +static int gfx_v12_1_suspend(struct amdgpu_ip_block *ip_block) +{ + return gfx_v12_1_hw_fini(ip_block); +} + +static int gfx_v12_1_resume(struct amdgpu_ip_block *ip_block) +{ + return gfx_v12_1_hw_init(ip_block); +} + +static bool gfx_v12_1_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), + regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) + return false; + } + return true; +} + +static int gfx_v12_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + unsigned i; + struct amdgpu_device *adev = ip_block->adev; + + for (i = 0; i < adev->usec_timeout; i++) { + if (gfx_v12_1_is_idle(ip_block)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock = 0; + + if (adev->smuio.funcs && + adev->smuio.funcs->get_gpu_clock_counter) + clock = adev->smuio.funcs->get_gpu_clock_counter(adev); + else + dev_warn(adev->dev, "query gpu clock counter is not supported\n"); + + return clock; +} + +static int gfx_v12_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->gfx.funcs = &gfx_v12_1_gfx_funcs; + + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + + gfx_v12_1_set_kiq_pm4_funcs(adev); + gfx_v12_1_set_ring_funcs(adev); + gfx_v12_1_set_irq_funcs(adev); + gfx_v12_1_set_rlc_funcs(adev); + gfx_v12_1_set_mqd_funcs(adev); + gfx_v12_1_set_imu_funcs(adev); + + gfx_v12_1_init_rlcg_reg_access_ctrl(adev); + + return gfx_v12_1_init_microcode(adev); +} + +static int gfx_v12_1_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static bool gfx_v12_1_is_rlc_enabled(struct amdgpu_device *adev) +{ + uint32_t rlc_cntl; + + /* if RLC is not enabled, do nothing */ + rlc_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL); + return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; +} + +static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + unsigned i; + + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } +} + +static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); +} + +static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev, + bool enable) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + gfx_v12_1_xcc_update_perf_clk(adev, enable, i); +} + +static void gfx_v12_1_update_spm_vmid(struct amdgpu_device *adev, + int xcc_id, + struct amdgpu_ring *ring, + unsigned vmid) +{ + u32 reg, data; + + reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev)) + data = RREG32_NO_KIQ(reg); + else + data = RREG32(reg); + + data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; + + if (amdgpu_sriov_is_pp_one_vf(adev)) + WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data); + else + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data); + + if (ring + && amdgpu_sriov_is_pp_one_vf(adev) + && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) + || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { + uint32_t reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL); + amdgpu_ring_emit_wreg(ring, reg, data); + } +} + +static const struct amdgpu_rlc_funcs gfx_v12_1_rlc_funcs = { + .is_rlc_enabled = gfx_v12_1_is_rlc_enabled, + .set_safe_mode = gfx_v12_1_xcc_set_safe_mode, + .unset_safe_mode = gfx_v12_1_xcc_unset_safe_mode, + .init = gfx_v12_1_rlc_init, + .get_csb_size = gfx_v12_1_get_csb_size, + .get_csb_buffer = gfx_v12_1_get_csb_buffer, + .resume = gfx_v12_1_rlc_resume, + .stop = gfx_v12_1_rlc_stop, + .reset = gfx_v12_1_rlc_reset, + .start = gfx_v12_1_rlc_start, + .update_spm_vmid = gfx_v12_1_update_spm_vmid, +}; + +#if 0 +static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + /* TODO */ +} + +static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + /* TODO */ +} +#endif + +static int gfx_v12_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = (state == AMD_PG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 1, 0): + amdgpu_gfx_off_ctrl(adev, enable); + break; + default: + break; + } + + return 0; +} + +static void gfx_v12_1_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS))) + return; + + if (enable) { + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE); + + /* unset CGCG override */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || + adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + + /* update CGCG override bits */ + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE, data); + + /* enable cgcg FSM(0x0000363F) */ + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; + data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + } + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + } + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGCG_CGLS_CTRL, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); + + data &= ~CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK; + data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; + data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL); + data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); + data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); + data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); + data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL, data); + } else { + /* Program RLC_CGCG_CGLS_CTRL */ + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); + } +} + +static void gfx_v12_1_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) + return; + + /* It is disabled by HW by default */ + if (enable) { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); + } + } else { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); + } + } +} + +static void gfx_v12_1_xcc_update_repeater_fgcg(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK); + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static void gfx_v12_1_xcc_update_sram_fgcg(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static int gfx_v12_1_xcc_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + + gfx_v12_1_xcc_update_coarse_grain_clock_gating(adev, enable, xcc_id); + + gfx_v12_1_xcc_update_medium_grain_clock_gating(adev, enable, xcc_id); + + gfx_v12_1_xcc_update_repeater_fgcg(adev, enable, xcc_id); + + gfx_v12_1_xcc_update_sram_fgcg(adev, enable, xcc_id); + + gfx_v12_1_xcc_update_perf_clk(adev, enable, xcc_id); + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS)) + gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, enable, xcc_id); + + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); + + return 0; +} + +static int gfx_v12_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, num_xcc; + + if (amdgpu_sriov_vf(adev)) + return 0; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(12, 1, 0): + for (i = 0; i < num_xcc; i++) + gfx_v12_1_xcc_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE, i); + break; + default: + break; + } + + return 0; +} + +static void gfx_v12_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) +{ + struct amdgpu_device *adev = ip_block->adev; + int data; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_REPEATER_FGCG */ + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; + + /* AMD_CG_SUPPORT_GFX_FGCG */ + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_FGCG; + + /* AMD_CG_SUPPORT_GFX_PERF_CLK */ + if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; +} + +static u64 gfx_v12_1_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + /* gfx12 hardware is 32bit rptr */ + return *(uint32_t *)ring->rptr_cpu_addr; +} + +static u64 gfx_v12_1_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + else + BUG(); + return wptr; +} + +static void gfx_v12_1_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx12 now */ + } +} + +static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ(1) | + PACKET3_RELEASE_MEM_GCR_GLV_WB | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(2) | + PACKET3_RELEASE_MEM_TEMPORAL(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5))); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v12_1_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), + upper_32_bits(addr), seq, 0xffffffff, 4); +} + +static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint8_t dst_sel) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static void gfx_v12_1_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* compute doesn't have PFP */ + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); + } +} + +static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + struct amdgpu_device *adev = ring->adev; + + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v12_1_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs) +{ + struct amdgpu_device *adev = ring->adev; + + reg = soc_v1_0_normalize_xcc_reg_offset(reg); + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); +} + +static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + reg = soc_v1_0_normalize_xcc_reg_offset(reg); + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v12_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v12_1_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + gfx_v12_1_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); +} + +static void gfx_v12_1_xcc_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state, + int xcc_id) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET( + GC, GET_INST(GC, xcc_id), + regCP_ME1_PIPE0_INT_CNTL); + break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET( + GC, GET_INST(GC, xcc_id), + regCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET( + GC, GET_INST(GC, xcc_id), + regCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET( + GC, GET_INST(GC, xcc_id), + regCP_ME1_PIPE3_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + GENERIC0_INT_ENABLE, 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + GENERIC0_INT_ENABLE, 1); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); + break; + default: + break; + } +} + +static int gfx_v12_1_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + switch (type) { + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v12_1_xcc_set_compute_eop_interrupt_state( + adev, 1, 0, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v12_1_xcc_set_compute_eop_interrupt_state( + adev, 1, 1, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v12_1_xcc_set_compute_eop_interrupt_state( + adev, 1, 2, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v12_1_xcc_set_compute_eop_interrupt_state( + adev, 1, 3, state, i); + break; + default: + break; + } + } + + return 0; +} + +static int gfx_v12_1_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i, xcc_id; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + uint32_t mes_queue_id = entry->src_data[0]; + + DRM_DEBUG("IH: CP EOP\n"); + + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { + struct amdgpu_mes_queue *queue; + + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + + spin_lock(&adev->mes.queue_id_lock); + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); + if (queue) { + DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); + amdgpu_fence_process(queue->ring); + } + spin_unlock(&adev->mes.queue_id_lock); + } else { + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id); + + if (xcc_id == -EINVAL) + return -EINVAL; + + switch (me_id) { + case 0: + if (pipe_id == 0) + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + else + amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring + [i + + xcc_id * adev->gfx.num_compute_rings]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead + * of per queue. + */ + if ((ring->me == me_id) && + (ring->pipe == pipe_id) && + (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + } + + return 0; +} + +static int gfx_v12_1_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v12_1_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static void gfx_v12_1_handle_priv_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i, xcc_id; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id); + + if (xcc_id == -EINVAL) + return; + + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we only enabled 1 gfx queue per pipe for now */ + if (ring->me == me_id && ring->pipe == pipe_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring + [i + + xcc_id * adev->gfx.num_compute_rings]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; + } +} + +static int gfx_v12_1_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + gfx_v12_1_handle_priv_fault(adev, entry); + return 0; +} + +static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + gfx_v12_1_handle_priv_fault(adev, entry); + return 0; +} + +static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int gcr_cntl = + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(2); + + /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); + amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ + amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ +} + +static const struct amd_ip_funcs gfx_v12_1_ip_funcs = { + .name = "gfx_v12_1", + .early_init = gfx_v12_1_early_init, + .late_init = gfx_v12_1_late_init, + .sw_init = gfx_v12_1_sw_init, + .sw_fini = gfx_v12_1_sw_fini, + .hw_init = gfx_v12_1_hw_init, + .hw_fini = gfx_v12_1_hw_fini, + .suspend = gfx_v12_1_suspend, + .resume = gfx_v12_1_resume, + .is_idle = gfx_v12_1_is_idle, + .wait_for_idle = gfx_v12_1_wait_for_idle, + .set_clockgating_state = gfx_v12_1_set_clockgating_state, + .set_powergating_state = gfx_v12_1_set_powergating_state, + .get_clockgating_state = gfx_v12_1_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v12_1_ring_get_rptr_compute, + .get_wptr = gfx_v12_1_ring_get_wptr_compute, + .set_wptr = gfx_v12_1_ring_set_wptr_compute, + .emit_frame_size = + 7 + /* gfx_v12_1_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v12_1_ring_emit_vm_flush */ + 8 + 8 + 8 + /* gfx_v12_1_ring_emit_fence x3 for user fence, vm fence */ + 8, /* gfx_v12_1_emit_mem_sync */ + .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */ + .emit_ib = gfx_v12_1_ring_emit_ib_compute, + .emit_fence = gfx_v12_1_ring_emit_fence, + .emit_pipeline_sync = gfx_v12_1_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v12_1_ring_emit_vm_flush, + .test_ring = gfx_v12_1_ring_test_ring, + .test_ib = gfx_v12_1_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v12_1_ring_emit_wreg, + .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait, + .emit_mem_sync = gfx_v12_1_emit_mem_sync, +}; + +static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v12_1_ring_get_rptr_compute, + .get_wptr = gfx_v12_1_ring_get_wptr_compute, + .set_wptr = gfx_v12_1_ring_set_wptr_compute, + .emit_frame_size = + 7 + /* gfx_v12_1_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v12_1_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v12_1_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */ + .emit_ib = gfx_v12_1_ring_emit_ib_compute, + .emit_fence = gfx_v12_1_ring_emit_fence_kiq, + .test_ring = gfx_v12_1_ring_test_ring, + .test_ib = gfx_v12_1_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v12_1_ring_emit_rreg, + .emit_wreg = gfx_v12_1_ring_emit_wreg, + .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait, +}; + +static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + adev->gfx.kiq[i].ring.funcs = &gfx_v12_1_ring_funcs_kiq; + + for (j = 0; j < adev->gfx.num_compute_rings; j++) + adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs = + &gfx_v12_1_ring_funcs_compute; + } +} + +static const struct amdgpu_irq_src_funcs gfx_v12_1_eop_irq_funcs = { + .set = gfx_v12_1_set_eop_interrupt_state, + .process = gfx_v12_1_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_reg_irq_funcs = { + .set = gfx_v12_1_set_priv_reg_fault_state, + .process = gfx_v12_1_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = { + .set = gfx_v12_1_set_priv_inst_fault_state, + .process = gfx_v12_1_priv_inst_irq, +}; + +static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v12_1_eop_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v12_1_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs; +} + +static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + adev->gfx.imu.mode = MISSION_MODE; + else + adev->gfx.imu.mode = DEBUG_MODE; + if (!amdgpu_sriov_vf(adev)) + adev->gfx.imu.funcs = &gfx_v12_1_imu_funcs; +} + +static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev) +{ + adev->gfx.rlc.funcs = &gfx_v12_1_rlc_funcs; +} + +static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev) +{ + /* set compute eng mqd */ + adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = + sizeof(struct v12_1_compute_mqd); + adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = + gfx_v12_1_compute_mqd_init; +} + +static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device *adev, + u32 bitmap, int xcc_id) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data); +} + +static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev, + int xcc_id) +{ + u32 data, mask; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); + + return (~data) & mask; +} + +static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, xcc_id, active_cu_number = 0; + u32 mask, bitmap; + unsigned int disable_masks[2 * 2]; + + if (!adev || !cu_info) + return -EINVAL; + + if (adev->gfx.config.max_shader_engines > 2 || + adev->gfx.config.max_sh_per_se > 2) { + dev_err(adev->dev, + "Max SE (%d) and Max SA per SE (%d) is greater than expected\n", + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); + return -EINVAL; + } + + amdgpu_gfx_parse_disable_cu(adev, disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); + + mutex_lock(&adev->grbm_idx_mutex); + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v12_1_get_sa_active_bitmap(adev, xcc_id) >> bitmap) & 1)) + continue; + mask = 1; + counter = 0; + gfx_v12_1_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); + gfx_v12_1_set_user_cu_inactive_bitmap_per_sh( + adev, + disable_masks[i * adev->gfx.config.max_sh_per_se + j], + xcc_id); + bitmap = gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id); + + cu_info->bitmap[xcc_id][i][j] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) + counter++; + + mask <<= 1; + } + active_cu_number += counter; + } + } + gfx_v12_1_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); + } + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1; + cu_info->lds_size = 320; + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v12_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 12, + .minor = 1, + .rev = 0, + .funcs = &gfx_v12_1_ip_funcs, +}; + +static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t tmp_mask; + int i, r; + + /* TODO : Initialize golden regs */ + /* gfx_v12_1_init_golden_registers(adev); */ + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) + gfx_v12_1_xcc_constants_init(adev, i); + + if (!amdgpu_sriov_vf(adev)) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + r = gfx_v12_1_xcc_rlc_resume(adev, i); + if (r) + return r; + } + } + + r = gfx_v12_1_xcc_cp_resume(adev, inst_mask); + + return r; +} + +static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for_each_inst(i, inst_mask) + gfx_v12_1_xcc_fini(adev, i); + + return 0; +} + +struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs = { + .suspend = &gfx_v12_1_xcp_suspend, + .resume = &gfx_v12_1_xcp_resume +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h new file mode 100644 index 000000000000..684d93a1e2f8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V12_1_H__ +#define __GFX_V12_1_H__ + +extern const struct amdgpu_ip_block_version gfx_v12_1_ip_block; + +extern struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h new file mode 100644 index 000000000000..9a5c82c8db53 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h @@ -0,0 +1,475 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef GFX_V12_1_PKT_H +#define GFX_V12_1_PKT_H + +/** + * PM4 definitions + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + ((reg) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_SET_BASE 0x11 +#define PACKET3_BASE_INDEX(x) ((x) << 0) +#define CE_PARTITION_BASE 3 +#define PACKET3_CLEAR_STATE 0x12 +#define PACKET3_INDEX_BUFFER_SIZE 0x13 +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_DISPATCH_INDIRECT 0x16 +#define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 +#define PACKET3_ATOMIC_GDS 0x1D +#define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_OCCLUSION_QUERY 0x1F +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_DRAW_INDIRECT 0x24 +#define PACKET3_DRAW_INDEX_INDIRECT 0x25 +#define PACKET3_INDEX_BASE 0x26 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 +#define PACKET3_INDIRECT_BUFFER_PRIV 0x32 +#define PACKET3_INDIRECT_BUFFER_CNST 0x33 +#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33 +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 +#define PACKET3_DRAW_PREAMBLE 0x36 +#define PACKET3_WRITE_DATA 0x37 +#define WRITE_DATA_DST_SEL(x) ((x) << 8) + /* 0 - register + * 1 - memory (sync - via GRBM) + * 2 - gl2 + * 3 - gds + * 4 - reserved + * 5 - memory (async - direct) + */ +#define WR_ONE_ADDR (1 << 16) +#define WR_CONFIRM (1 << 20) +#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) + /* 0 - me + * 1 - pfp + * 2 - ce + */ +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +# define PACKET3_SEM_USE_MAILBOX (0x1 << 16) +# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ +# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) +# define PACKET3_SEM_SEL_WAIT (0x7 << 29) +#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A +#define PACKET3_COPY_DW 0x3B +#define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_OPERATION(x) ((x) << 6) + /* 0 - wait_reg_mem + * 1 - wr_wait_wr_reg + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ +#define PACKET3_INDIRECT_BUFFER 0x3F +#define INDIRECT_BUFFER_VALID (1 << 23) +#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) + /* 0 - LRU + * 1 - Stream + * 2 - Bypass + */ +#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) +#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_COND_INDIRECT_BUFFER 0x3F +#define PACKET3_COPY_DATA 0x40 +#define PACKET3_CP_DMA 0x41 +#define PACKET3_PFP_SYNC_ME 0x42 +#define PACKET3_SURFACE_SYNC 0x43 +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_* + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + */ +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define PACKET3_EVENT_WRITE_EOS 0x48 +#define PACKET3_RELEASE_MEM 0x49 +#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0) +#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8) +#define PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(x) ((x) << 12) +#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14) +#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16) +#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE(x) ((x) << 17) +#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19) +#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20) +#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21) +#define PACKET3_RELEASE_MEM_GCR_SEQ(x) ((x) << 22) +#define PACKET3_RELEASE_MEM_GCR_GLV_WB (1 << 24) +#define PACKET3_RELEASE_MEM_TEMPORAL(x) ((x) << 25) + /* 0 - temporal__release_mem__rt + * 1 - temporal__release_mem__nt + * 2 - temporal__release_mem__ht + * 3 - temporal__release_mem__lu + */ +#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28) + +#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit GPU counter value + * 4 - send 64bit sys counter value + */ +#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ +#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) + /* 0 - MC + * 1 - TC/L2 + */ + + + +#define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) +#define PACKET3_DMA_DATA 0x50 +/* 1. header + * 2. CONTROL + * 3. SRC_ADDR_LO or DATA [31:0] + * 4. SRC_ADDR_HI [31:0] + * 5. DST_ADDR_LO [31:0] + * 6. DST_ADDR_HI [7:0] + * 7. COMMAND [31:26] | BYTE_COUNT [25:0] + */ +/* CONTROL */ +# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) + /* 0 - ME + * 1 - PFP + */ +# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) + /* 0 - DST_ADDR using DAS + * 1 - GDS + * 3 - DST_ADDR using L2 + */ +# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) + /* 0 - SRC_ADDR using SAS + * 1 - GDS + * 2 - DATA + * 3 - SRC_ADDR using L2 + */ +# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) +# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) +# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) +#define PACKET3_CONTEXT_REG_RMW 0x51 +#define PACKET3_GFX_CNTX_UPDATE 0x52 +#define PACKET3_BLK_CNTX_UPDATE 0x53 +#define PACKET3_INCR_UPDT_STATE 0x55 +#define PACKET3_ACQUIRE_MEM 0x58 +/* 1. HEADER + * 2. COHER_CNTL [30:0] + * 2.1 ENGINE_SEL [31:31] + * 2. COHER_SIZE [31:0] + * 3. COHER_SIZE_HI [7:0] + * 4. COHER_BASE_LO [31:0] + * 5. COHER_BASE_HI [23:0] + * 7. POLL_INTERVAL [15:0] + * 8. GCR_CNTL [18:0] + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0) + /* + * 0:NOP + * 1:ALL + * 2:RANGE + * 3:FIRST_LAST + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2) + /* + * 0:ALL + * 1:reserved + * 2:RANGE + * 3:FIRST_LAST + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(x) ((x) << 4) + /* + * 0:Device scope + * 1:System scope + * 2:Force INV/WB all + * 3:Reserved + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_WB(x) ((x) << 6) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11) + /* + * 0:ALL + * 1:VOL + * 2:RANGE + * 3:FIRST_LAST + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16) + /* + * 0: PARALLEL + * 1: FORWARD + * 2: REVERSE + */ +#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) +#define PACKET3_REWIND 0x59 +#define PACKET3_INTERRUPT 0x5A +#define PACKET3_GEN_PDEPTE 0x5B +#define PACKET3_INDIRECT_BUFFER_PASID 0x5C +#define PACKET3_PRIME_UTCL2 0x5D +#define PACKET3_LOAD_UCONFIG_REG 0x5E +#define PACKET3_LOAD_SH_REG 0x5F +#define PACKET3_LOAD_CONFIG_REG 0x60 +#define PACKET3_LOAD_CONTEXT_REG 0x61 +#define PACKET3_LOAD_COMPUTE_STATE 0x62 +#define PACKET3_LOAD_SH_REG_INDEX 0x63 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_START 0x00002000 +#define PACKET3_SET_CONFIG_REG_END 0x00002c00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_START 0x0000a000 +#define PACKET3_SET_CONTEXT_REG_END 0x0000a400 +#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A +#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71 +#define PACKET3_SET_SH_REG_DI 0x72 +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 +#define PACKET3_SET_SH_REG_DI_MULTI 0x74 +#define PACKET3_GFX_PIPE_LOCK 0x75 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x00002c00 +#define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_SET_QUEUE_REG 0x78 +#define PACKET3_SET_UCONFIG_REG 0x79 +#define PACKET3_SET_UCONFIG_REG_START 0x0000c000 +#define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A +#define PACKET3_FORWARD_HEADER 0x7C +#define PACKET3_SCRATCH_RAM_WRITE 0x7D +#define PACKET3_SCRATCH_RAM_READ 0x7E +#define PACKET3_LOAD_CONST_RAM 0x80 +#define PACKET3_WRITE_CONST_RAM 0x81 +#define PACKET3_DUMP_CONST_RAM 0x83 +#define PACKET3_INCREMENT_CE_COUNTER 0x84 +#define PACKET3_INCREMENT_DE_COUNTER 0x85 +#define PACKET3_WAIT_ON_CE_COUNTER 0x86 +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 +#define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C +#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C +#define PACKET3_DISPATCH_DRAW 0x8D +#define PACKET3_DISPATCH_DRAW_ACE 0x8D +#define PACKET3_GET_LOD_STATS 0x8E +#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_TMZ (1 << 0) +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ +#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91 +#define PACKET3_WAIT_REG_MEM64 0x93 +#define PACKET3_COND_PREEMPT 0x94 +#define PACKET3_HDP_FLUSH 0x95 +#define PACKET3_COPY_DATA_RB 0x96 +#define PACKET3_INVALIDATE_TLBS 0x98 +#define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +#define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +#define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +#define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) + +#define PACKET3_AQL_PACKET 0x99 +#define PACKET3_DMA_DATA_FILL_MULTI 0x9A +#define PACKET3_SET_SH_REG_INDEX 0x9B +#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C +#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D +#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E +#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F +#define PACKET3_SET_RESOURCES 0xA0 +/* 1. header + * 2. CONTROL + * 3. QUEUE_MASK_LO [31:0] + * 4. QUEUE_MASK_HI [31:0] + * 5. GWS_MASK_LO [31:0] + * 6. GWS_MASK_HI [31:0] + * 7. OAC_MASK [15:0] + * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] + */ +# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) +# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) +# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) +#define PACKET3_MAP_PROCESS 0xA1 +#define PACKET3_MAP_QUEUES 0xA2 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. MQD_ADDR_LO [31:0] + * 5. MQD_ADDR_HI [31:0] + * 6. WPTR_ADDR_LO [31:0] + * 7. WPTR_ADDR_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) +# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13) +# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) +# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) +# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) +# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2 */ +# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) +# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) +#define PACKET3_UNMAP_QUEUES 0xA3 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. CONTROL3 + * 5. CONTROL4 + * 6. CONTROL5 + */ +/* CONTROL */ +# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) + /* 0 - PREEMPT_QUEUES + * 1 - RESET_QUEUES + * 2 - DISABLE_PROCESS_QUEUES + * 3 - PREEMPT_QUEUES_NO_UNMAP + */ +# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2a */ +# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) +/* CONTROL3a */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) +/* CONTROL3b */ +# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) +/* CONTROL4 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) +/* CONTROL5 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) +#define PACKET3_QUERY_STATUS 0xA4 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. ADDR_LO [31:0] + * 5. ADDR_HI [31:0] + * 6. DATA_LO [31:0] + * 7. DATA_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) +# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) +# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) +/* CONTROL2a */ +# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_LIST 0xA5 +#define PACKET3_MAP_PROCESS_VM 0xA6 +/* GFX11 */ +#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 +# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) +# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 80565392313f..73223d97a87f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2010,7 +2010,7 @@ static int gfx_v6_0_cp_gfx_start(struct amdgpu_device *adev) r = amdgpu_ring_alloc(ring, 7 + 4); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } amdgpu_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5)); @@ -2031,7 +2031,7 @@ static int gfx_v6_0_cp_gfx_start(struct amdgpu_device *adev) r = amdgpu_ring_alloc(ring, gfx_v6_0_get_csb_size(adev) + 10); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -3002,7 +3002,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev, u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { - DRM_INFO("Not implemented\n"); + drm_info(adev_to_drm(adev), "Not implemented\n"); } static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = { @@ -3555,7 +3555,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) memset(cu_info, 0, sizeof(*cu_info)); - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 2b7aba22ecc1..2b691452775b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2068,23 +2068,15 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { u32 ref_and_mask; - int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1; + int usepfp; + struct amdgpu_device *adev = ring->adev; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - switch (ring->me) { - case 1: - ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; - break; - case 2: - ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; - break; - default: - return; - } - } else { - ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &usepfp); amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ WAIT_REG_MEM_FUNCTION(3) | /* == */ @@ -2473,7 +2465,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev) r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -3245,7 +3237,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); return 0; } @@ -3471,7 +3463,8 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -4074,12 +4067,49 @@ static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, cik_srbm_select(adev, me, pipe, q, vm); } +/** + * gfx_v7_0_get_hdp_flush_mask - get the reference and mask for HDP flush + * + * @ring: amdgpu_ring structure holding ring information + * @ref_and_mask: pointer to store the reference and mask + * @reg_mem_engine: pointer to store the register memory engine + * + * Calculates the reference and mask for HDP flush based on the ring type and me. + */ +static void gfx_v7_0_get_hdp_flush_mask(struct amdgpu_ring *ring, + uint32_t *ref_and_mask, uint32_t *reg_mem_engine) +{ + if (!ring || !ref_and_mask || !reg_mem_engine) { + DRM_INFO("%s:invalid params\n", __func__); + return; + } + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE || + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { + switch (ring->me) { + case 1: + *ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; + break; + case 2: + *ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; + break; + default: + return; + } + *reg_mem_engine = 0; + } else { + *ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; + *reg_mem_engine = 1; + } +} + static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter, .select_se_sh = &gfx_v7_0_select_se_sh, .read_wave_data = &gfx_v7_0_read_wave_data, .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs, - .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q + .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q, + .get_hdp_flush_mask = &gfx_v7_0_get_hdp_flush_mask, }; static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { @@ -5033,7 +5063,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) memset(cu_info, 0, sizeof(*cu_info)); - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1c87375e1dd5..a6b4c8f41dc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1048,7 +1048,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->gfx.ce_feature_version >= 46 && adev->gfx.pfp_feature_version >= 46) { adev->virt.chained_ib_support = true; - DRM_INFO("Chained IB support enabled!\n"); + drm_info(adev_to_drm(adev), "Chained IB support enabled!\n"); } else adev->virt.chained_ib_support = false; @@ -1274,7 +1274,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); return 0; } @@ -1509,7 +1509,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) r = amdgpu_ib_get(adev, NULL, total_size, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%d).\n", r); return r; } @@ -1604,14 +1604,14 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* shedule the ib on the ring */ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) { - DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + drm_err(adev_to_drm(adev), "ib submit failed (%d).\n", r); goto fail; } /* wait for the GPU to finish processing the IB */ r = dma_fence_wait(f, false); if (r) { - DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + drm_err(adev_to_drm(adev), "fence wait failed (%d).\n", r); goto fail; } @@ -3820,7 +3820,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); - DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + drm_info(adev_to_drm(adev), "Timeout wait for RLC serdes %u,%u\n", i, j); return; } @@ -4143,7 +4143,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -5211,13 +5211,49 @@ static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } +/** + * gfx_v8_0_get_hdp_flush_mask - get the reference and mask for HDP flush + * + * @ring: amdgpu_ring structure holding ring information + * @ref_and_mask: pointer to store the reference and mask + * @reg_mem_engine: pointer to store the register memory engine + * + * Calculates the reference and mask for HDP flush based on the ring type and me. + */ +static void gfx_v8_0_get_hdp_flush_mask(struct amdgpu_ring *ring, + uint32_t *ref_and_mask, uint32_t *reg_mem_engine) +{ + if (!ring || !ref_and_mask || !reg_mem_engine) { + DRM_INFO("%s:invalid params\n", __func__); + return; + } + + if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || + (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { + switch (ring->me) { + case 1: + *ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; + break; + case 2: + *ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; + break; + default: + return; + } + *reg_mem_engine = 0; + } else { + *ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; + *reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ + } +} static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, .select_se_sh = &gfx_v8_0_select_se_sh, .read_wave_data = &gfx_v8_0_read_wave_data, .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, - .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q + .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q, + .get_hdp_flush_mask = &gfx_v8_0_get_hdp_flush_mask, }; static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block) @@ -5541,7 +5577,8 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) +static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -5999,25 +6036,14 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { u32 ref_and_mask, reg_mem_engine; + struct amdgpu_device *adev = ring->adev; - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || - (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { - switch (ring->me) { - case 1: - ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; - break; - case 2: - ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; - break; - default: - return; - } - reg_mem_engine = 0; - } else { - ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; - reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ WAIT_REG_MEM_FUNCTION(3) | /* == */ @@ -6643,7 +6669,7 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, switch (enc) { case 0: - DRM_INFO("SQ general purpose intr detected:" + drm_info(adev_to_drm(adev), "SQ general purpose intr detected:" "se_id %d, immed_overflow %d, host_reg_overflow %d," "host_cmd_overflow %d, cmd_timestamp %d," "reg_timestamp %d, thread_trace_buff_full %d," @@ -6685,7 +6711,7 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, else sprintf(type, "EDC/ECC error"); - DRM_INFO( + drm_info(adev_to_drm(adev), "SQ %s detected: " "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " "trap %s, sq_ed_info.source %s.\n", @@ -7058,7 +7084,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) else ao_cu_num = adev->gfx.config.max_cu_per_sh; - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0148d7ff34d9..7e9d753f4a80 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1301,7 +1301,8 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || (adev->gfx.pfp_feature_version < 46))) - DRM_WARN_ONCE("CP firmware version too old, please update!"); + drm_warn_once(adev_to_drm(adev), + "CP firmware version too old, please update!"); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): @@ -2004,6 +2005,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, }; const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { @@ -2039,7 +2041,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; - DRM_INFO("fix gfx.config for vega12\n"); + drm_info(adev_to_drm(adev), "fix gfx.config for vega12\n"); break; case IP_VERSION(9, 4, 0): adev->gfx.ras = &gfx_v9_0_ras; @@ -2409,8 +2411,10 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); - if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset) + if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { @@ -2727,7 +2731,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); - DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + drm_info(adev_to_drm(adev), "Timeout wait for RLC serdes %u,%u\n", i, j); return; } @@ -3150,7 +3154,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) /* RLC_GPM_GENERAL_6 : RLC Ucode version */ rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); if(rlc_ucode_ver == 0x108) { - DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + drm_info(adev_to_drm(adev), "Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i\n", rlc_ucode_ver, adev->gfx.rlc_fw_version); /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, * default is 0x9C4 to create a 100us interval */ @@ -3333,12 +3337,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) */ if (adev->flags & AMD_IS_APU && adev->in_s3 && !pm_resume_via_firmware()) { - DRM_INFO("Will skip the CSB packet resubmit\n"); + drm_info(adev_to_drm(adev), "Will skip the CSB packet resubmit\n"); return 0; } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); return r; } @@ -4581,7 +4585,7 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) r = amdgpu_ring_alloc(ring, 7); if (r) { - DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", + drm_err(adev_to_drm(adev), "GDS workarounds failed to lock ring %s (%d).\n", ring->name, r); return r; } @@ -4670,7 +4674,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) r = amdgpu_ib_get(adev, NULL, total_size, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%d).\n", r); return r; } @@ -4771,14 +4775,14 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* shedule the ib on the ring */ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) { - DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + drm_err(adev_to_drm(adev), "ib schedule failed (%d).\n", r); goto fail; } /* wait for the GPU to finish processing the IB */ r = dma_fence_wait(f, false); if (r) { - DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + drm_err(adev_to_drm(adev), "fence wait failed (%d).\n", r); goto fail; } @@ -5171,7 +5175,8 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) +static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -5379,25 +5384,13 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - switch (ring->me) { - case 1: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; - break; - case 2: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; - break; - default: - return; - } - reg_mem_engine = 0; - } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; - reg_mem_engine = 1; /* pfp */ + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, adev->nbio.funcs->get_hdp_flush_req_offset(adev), adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -5581,15 +5574,42 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, 0); } -static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_event_write(struct amdgpu_ring *ring, + uint32_t event_type, + uint32_t event_index) { - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - uint32_t seq = ring->fence_drv.sync_seq; - uint64_t addr = ring->fence_drv.gpu_addr; + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); + amdgpu_ring_write(ring, EVENT_TYPE(event_type) | + EVENT_INDEX(event_index)); +} - gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, - lower_32_bits(addr), upper_32_bits(addr), - seq, 0xffffffff, 4); +static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int cp_coher_cntl = + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); + + /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ +} + +static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + gfx_v9_0_ring_emit_event_write(ring, VS_PARTIAL_FLUSH, 4); + gfx_v9_0_ring_emit_event_write(ring, PS_PARTIAL_FLUSH, 4); + } + gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 4); + gfx_v9_0_emit_mem_sync(ring); } static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, @@ -5744,7 +5764,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) { r = -EINVAL; - DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); + drm_warn(adev_to_drm(adev), "ring %d timeout to preempt ib\n", ring->idx); } /*reset the CP_VMID_PREEMPT after trailing fence*/ @@ -7080,25 +7100,6 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, gfx_v9_0_query_utc_edc_status(adev, err_data); } -static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) -{ - const unsigned int cp_coher_cntl = - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); - - /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ - amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); - amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ - amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ - amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ - amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ - amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ - amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ -} - static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, uint32_t pipe, bool enable) { @@ -7173,6 +7174,91 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_insert_nop(ring, num_nop - 1); } +static void gfx_v9_0_ring_emit_wreg_me(struct amdgpu_ring *ring, + uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + /* send the reset - 5 */ + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v9_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + amdgpu_ring_commit(kiq_ring); + r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 8 + 7 + 5 + 2 + 8 + 7)) + return -ENOMEM; + /* emit the fence to finish the reset - 8 */ + ring->trail_seq++; + gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC); + /* wait for the fence - 7 */ + gfx_v9_0_wait_reg_mem(ring, 0, 1, 0, + lower_32_bits(ring->trail_fence_gpu_addr), + upper_32_bits(ring->trail_fence_gpu_addr), + ring->trail_seq, 0xffffffff, 4); + /* clear mmCP_VMID_RESET - 5 */ + gfx_v9_0_ring_emit_wreg_me(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); + /* event write ENABLE_LEGACY_PIPELINE - 2 */ + gfx_v9_0_ring_emit_event_write(ring, ENABLE_LEGACY_PIPELINE, 0); + /* emit a regular fence - 8 */ + ring->trail_seq++; + gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC); + /* wait for the fence - 7 */ + gfx_v9_0_wait_reg_mem(ring, 1, 1, 0, + lower_32_bits(ring->trail_fence_gpu_addr), + upper_32_bits(ring->trail_fence_gpu_addr), + ring->trail_seq, 0xffffffff, 4); + amdgpu_ring_commit(ring); + /* wait for the commands to complete */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) @@ -7413,7 +7499,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .set_wptr = gfx_v9_0_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ - 7 + /* PIPELINE_SYNC */ + 13 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* VM_FLUSH */ @@ -7451,9 +7537,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .reset = gfx_v9_0_reset_kgq, .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; @@ -7469,7 +7555,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .set_wptr = amdgpu_sw_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ - 7 + /* PIPELINE_SYNC */ + 13 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* VM_FLUSH */ @@ -7530,7 +7616,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ + 9 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ @@ -7552,7 +7638,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, @@ -7573,7 +7658,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ @@ -7772,7 +7856,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, adev->gfx.config.max_sh_per_se > 16) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, + amdgpu_gfx_parse_disable_cu(adev, disable_masks, adev->gfx.config.max_shader_engines, adev->gfx.config.max_sh_per_se); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index cbb74ffc4792..ad4d442e7345 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -848,6 +848,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp, + .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, }; static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, @@ -1455,7 +1456,7 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); return 0; } @@ -1481,7 +1482,7 @@ static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); - DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + drm_info(adev_to_drm(adev), "Timeout wait for RLC serdes %u,%u\n", i, j); return; } @@ -1582,7 +1583,7 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6); if (rlc_ucode_ver == 0x108) { dev_info(adev->dev, - "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i\n", rlc_ucode_ver, adev->gfx.rlc_fw_version); /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, * default is 0x9C4 to create a 100us interval */ @@ -1666,12 +1667,12 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned vmid) +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, + int inst, struct amdgpu_ring *ring, unsigned int vmid) { u32 reg, pre_data, data; - reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL); + reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) pre_data = RREG32_NO_KIQ(reg); else @@ -1682,9 +1683,9 @@ static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu if (pre_data != data) { if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { - WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL, data); } else - WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL, data); } } @@ -2818,25 +2819,13 @@ static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - switch (ring->me) { - case 1: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; - break; - case 2: - ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; - break; - default: - return; - } - reg_mem_engine = 0; - } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; - reg_mem_engine = 1; /* pfp */ + if (!adev->gfx.funcs->get_hdp_flush_mask) { + dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); + return; } + adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1, adev->nbio.funcs->get_hdp_flush_req_offset(adev), adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -4914,7 +4903,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, adev->gfx.config.max_sh_per_se > 16) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, + amdgpu_gfx_parse_disable_cu(adev, disable_masks, adev->gfx.config.max_shader_engines, adev->gfx.config.max_sh_per_se); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c new file mode 100644 index 000000000000..4aa004ee2c4d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c @@ -0,0 +1,893 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_xcp.h" +#include "gfxhub_v12_1.h" + +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "soc_v1_0_enum.h" + +#include "soc15_common.h" + +#define regGCVM_L2_CNTL3_DEFAULT 0x80120007 +#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0 +#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000 + + +static u64 gfxhub_v12_1_get_fb_location(struct amdgpu_device *adev) +{ + u64 base; + + base = RREG32_SOC15(GC, GET_INST(GC, 0), + regGCMC_VM_FB_LOCATION_BASE_LO32); + base &= GCMC_VM_FB_LOCATION_BASE_LO32__FB_BASE_LO32_MASK; + base <<= 24; + + base |= ((u64)(GCMC_VM_FB_LOCATION_BASE_HI32__FB_BASE_HI1_MASK & + RREG32_SOC15(GC, GET_INST(GC, 0), + regGCMC_VM_FB_LOCATION_BASE_HI32)) << 56); + return base; +} + +static u64 gfxhub_v12_1_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)(RREG32_SOC15(GC, GET_INST(GC, 0), + regGCMC_VM_FB_OFFSET) << 24); +} + +static void gfxhub_v12_1_xcc_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base, + uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + int i; + + for_each_inst(i, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; + WREG32_SOC15_OFFSET(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); + } +} + +static void gfxhub_v12_1_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v12_1_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, + xcc_mask); +} + +static void gfxhub_v12_1_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + uint64_t pt_base; + int i; + + if (adev->gmc.pdb0_bo) + pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); + else + pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v12_1_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask); + + /* If use GART for FB translation, vmid0 page table covers both + * vram and system memory (gart) + */ + for_each_inst(i, xcc_mask) { + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.fb_start >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.fb_start >> 44)); + + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + } else { + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + } + } +} + +static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + uint64_t value; + uint32_t tmp; + int i; + + for_each_inst(i, xcc_mask) { + /* Program the AGP BAR */ + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BASE_LO32, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BASE_HI32, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BOT_LO32, + lower_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BOT_HI32, + upper_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_TOP_LO32, + lower_32_bits(adev->gmc.agp_end >> 24)); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_TOP_HI32, + upper_32_bits(adev->gmc.agp_end >> 24)); + + if (!amdgpu_sriov_vf(adev)) { + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + lower_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + upper_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, + lower_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, + upper_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18)); + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, + ENABLE_RETRY_FAULT_INTERRUPT, 0x1); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp); + } + + /* In the case squeezing vram into GART aperture, we don't use + * FB aperture and AGP aperture. Disable them. + */ + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_TOP_LO32, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_TOP_HI32, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_BASE_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_BASE_HI32, 1); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_AGP_TOP_LO32, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_AGP_TOP_HI32, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BOT_LO32, 0xFFFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BOT_HI32, 1); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + 0x7F); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); + } + } +} + +static void gfxhub_v12_1_xcc_init_tlb_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + uint32_t tmp; + int i; + + for_each_inst(i, xcc_mask) { + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, + GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, + GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, + GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, + GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, + GCMC_VM_MX_L1_TLB_CNTL, + ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, + GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); + + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_MX_L1_TLB_CNTL, tmp); + } +} + +static void gfxhub_v12_1_xcc_init_cache_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + uint32_t tmp; + int i; + + for_each_inst(i, xcc_mask) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_L2_CACHE, 1); + /*TODO: set ENABLE_L2_FRAGMENT_PROCESSING to 1? */ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, + INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, + INVALIDATE_L2_CACHE, 1); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCVM_L2_CNTL2, tmp); + + tmp = regGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL3, tmp); + + tmp = regGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 1); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL4, tmp); + + tmp = regGCVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, + L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL5, tmp); + } +} + +static void gfxhub_v12_1_xcc_enable_system_domain(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + uint32_t tmp; + int i; + + for_each_inst(i, xcc_mask) { + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + PAGE_TABLE_DEPTH, + adev->gmc.vmid0_page_table_depth); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->gmc.vmid0_page_table_block_size); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_CNTL, tmp); + } +} + +static void gfxhub_v12_1_xcc_disable_identity_aperture(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + int i; + + for_each_inst(i, xcc_mask) { + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x00001FFF); + + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); + } +} + +static void gfxhub_v12_1_xcc_setup_vmid_config(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + unsigned int num_level, block_size; + uint32_t tmp; + int i, j; + + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + block_size -= 9; + + for_each_inst(j, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, block_size); + /* Send no-retry XNACK on fault to suppress VM fault storm */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + 1); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; + } +} + +static void gfxhub_v12_1_xcc_program_invalidation(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + unsigned int i, j; + + for_each_inst(j, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xFFFFFFFF); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x3FFF); + } + } +} + +static int gfxhub_v12_1_xcc_gart_enable(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + uint32_t i; + + if (amdgpu_sriov_vf(adev)) { + /* GCMC_VM_FB_LOCATION_BASE/TOP are VF copy registers + * VBIO post does not program them at boot up phase + * Need driver to program them from guest side */ + for_each_inst(i, xcc_mask) { + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_BASE_LO32, + lower_32_bits(adev->gmc.vram_start >> 24)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_BASE_HI32, + upper_32_bits(adev->gmc.vram_start >> 24)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_TOP_LO32, + lower_32_bits(adev->gmc.vram_end >> 24)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_FB_LOCATION_TOP_HI32, + upper_32_bits(adev->gmc.vram_end >> 24)); + } + } + /* GART Enable. */ + gfxhub_v12_1_xcc_init_gart_aperture_regs(adev, xcc_mask); + gfxhub_v12_1_xcc_init_system_aperture_regs(adev, xcc_mask); + gfxhub_v12_1_xcc_init_tlb_regs(adev, xcc_mask); + if (!amdgpu_sriov_vf(adev)) + gfxhub_v12_1_xcc_init_cache_regs(adev, xcc_mask); + + gfxhub_v12_1_xcc_enable_system_domain(adev, xcc_mask); + if (!amdgpu_sriov_vf(adev)) + gfxhub_v12_1_xcc_disable_identity_aperture(adev, xcc_mask); + gfxhub_v12_1_xcc_setup_vmid_config(adev, xcc_mask); + gfxhub_v12_1_xcc_program_invalidation(adev, xcc_mask); + + return 0; +} + +static int gfxhub_v12_1_gart_enable(struct amdgpu_device *adev) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + return gfxhub_v12_1_xcc_gart_enable(adev, xcc_mask); +} + +static void gfxhub_v12_1_xcc_gart_disable(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + u32 tmp; + u32 i, j; + + for_each_inst(j, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, GET_INST(GC, j), + regGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), + regGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + if (!amdgpu_sriov_vf(adev)) { + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, GET_INST(GC, j), regGCVM_L2_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, j), regGCVM_L2_CNTL3, 0); + } + } +} + +static void gfxhub_v12_1_gart_disable(struct amdgpu_device *adev) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v12_1_xcc_gart_disable(adev, xcc_mask); +} + +static void gfxhub_v12_1_xcc_set_fault_enable_default(struct amdgpu_device *adev, + bool value, uint32_t xcc_mask) +{ + u32 tmp; + int i; + + for_each_inst(i, xcc_mask) { + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL_LO32); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE3_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + CLIENT_ID_NO_RETRY_FAULT_INTERRUPT, value ? 0xFFFF:0); + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + OTHER_CLIENT_ID_NO_RETRY_FAULT_INTERRUPT, value); + if (!value) + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_LO32, + CRASH_ON_NO_RETRY_FAULT, 1); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL_LO32, tmp); + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL_HI32); + if (!value) + tmp = REG_SET_FIELD(tmp, + GCVM_L2_PROTECTION_FAULT_CNTL_HI32, + CRASH_ON_RETRY_FAULT, 1); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL_HI32, tmp); + } +} + +/** + * gfxhub_v12_1_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void gfxhub_v12_1_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v12_1_xcc_set_fault_enable_default(adev, value, xcc_mask); +} + +static uint32_t gfxhub_v12_1_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + INVALIDATE_L2_PDE3, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static const char *gfxhub_v12_1_client_ids[] = { + "CB", + "DB", + "GE1", + "GE2", + "CPF", + "CPC", + "CPG", + "RLC", + "TCP", + "SQC (inst)", + "SQC (data)", + "SQG/PC/SC", + "Reserved", + "SDMA0", + "SDMA1", + "GCR", + "Reserved", + "Reserved", + "WGS", + "DSM", + "PA" +}; + +/*TODO: l2 protection fault status is increased to 64bits. + * some critical fields like FED are moved to STATUS_HI32 */ +static void gfxhub_v12_1_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + u32 cid = REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, + CID); + + dev_err(adev->dev, + "GCVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", + status); + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + cid >= ARRAY_SIZE(gfxhub_v12_1_client_ids) ? + "unknown" : gfxhub_v12_1_client_ids[cid], cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW)); +} + +static const struct amdgpu_vmhub_funcs gfxhub_v12_1_vmhub_funcs = { + .print_l2_protection_fault_status = gfxhub_v12_1_print_l2_protection_fault_status, + .get_invalidate_req = gfxhub_v12_1_get_invalidate_req, +}; + +static void gfxhub_v12_1_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + int i; + + for_each_inst(i, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_CONTEXT0_CNTL); + /* TODO: add a new member to accomandate additional fault status/cntl reg */ + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_STATUS_LO32); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL_LO32); + hub->ctx_distance = + regGCVM_CONTEXT1_CNTL - + regGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = + regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = + regGCVM_INVALIDATE_ENG1_REQ - + regGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = + regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = + GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &gfxhub_v12_1_vmhub_funcs; + } +} + +static void gfxhub_v12_1_init(struct amdgpu_device *adev) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v12_1_xcc_init(adev, xcc_mask); +} + +static int gfxhub_v12_1_get_xgmi_info(struct amdgpu_device *adev) +{ + u32 max_num_physical_nodes; + u32 max_physical_node_id; + u32 xgmi_lfb_cntl; + u32 max_region; + u64 seg_size; + + xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), + regGCMC_VM_XGMI_LFB_CNTL); + seg_size = REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0), + regGCMC_VM_XGMI_LFB_SIZE), + GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; + max_region = REG_GET_FIELD(xgmi_lfb_cntl, + GCMC_VM_XGMI_LFB_CNTL, + PF_MAX_REGION); + + max_num_physical_nodes = 8; + max_physical_node_id = 7; + + /* PF_MAX_REGION=0 means xgmi is disabled */ + if (max_region || adev->gmc.xgmi.connected_to_cpu) { + adev->gmc.xgmi.num_physical_nodes = max_region + 1; + + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) + return -EINVAL; + + adev->gmc.xgmi.physical_node_id = + REG_GET_FIELD(xgmi_lfb_cntl, + GCMC_VM_XGMI_LFB_CNTL, + PF_LFB_REGION); + + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) + return -EINVAL; + + adev->gmc.xgmi.node_segment_size = seg_size; + } + + return 0; +} + +const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = { + .get_fb_location = gfxhub_v12_1_get_fb_location, + .get_mc_fb_offset = gfxhub_v12_1_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v12_1_setup_vm_pt_regs, + .gart_enable = gfxhub_v12_1_gart_enable, + .gart_disable = gfxhub_v12_1_gart_disable, + .set_fault_enable_default = gfxhub_v12_1_set_fault_enable_default, + .init = gfxhub_v12_1_init, + .get_xgmi_info = gfxhub_v12_1_get_xgmi_info, +}; + +static int gfxhub_v12_1_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool value; + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + gfxhub_v12_1_xcc_set_fault_enable_default(adev, value, inst_mask); + + if (!amdgpu_sriov_vf(adev)) + return gfxhub_v12_1_xcc_gart_enable(adev, inst_mask); + + return 0; +} + +static int gfxhub_v12_1_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!amdgpu_sriov_vf(adev)) + gfxhub_v12_1_xcc_gart_disable(adev, inst_mask); + + return 0; +} + +struct amdgpu_xcp_ip_funcs gfxhub_v12_1_xcp_funcs = { + .suspend = &gfxhub_v12_1_xcp_suspend, + .resume = &gfxhub_v12_1_xcp_resume +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h new file mode 100644 index 000000000000..b3f8f0aa0024 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V12_1_H__ +#define __GFXHUB_V12_1_H__ + +extern const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs; + +extern struct amdgpu_xcp_ip_funcs gfxhub_v12_1_xcp_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index deb95fab02df..aceb8447feac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -649,7 +649,7 @@ static void gfxhub_v2_1_halt(struct amdgpu_device *adev) } if (!time) - DRM_WARN("failed to wait for GRBM(EA) idle\n"); + drm_warn(adev_to_drm(adev), "failed to wait for GRBM(EA) idle\n"); } const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ce6e04242c52..2568eeaae945 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -115,27 +115,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (retry_fault) { + int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0, + write_fault); /* Returning 1 here also prevents sending the IV to the KFD */ - - /* Process it onyl if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, - entry->timestamp)) - return 1; - - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } - - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, - entry->timestamp, write_fault)) + if (ret == 1) return 1; } @@ -867,7 +850,7 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + drm_warn(adev_to_drm(adev), "No suitable DMA available.\n"); return r; } @@ -974,7 +957,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (!adev->in_s0ix) gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ba59ee8e398a..6349e239a367 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -114,27 +114,10 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (retry_fault) { + int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0, + write_fault); /* Returning 1 here also prevents sending the IV to the KFD */ - - /* Process it only if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, - entry->timestamp)) - return 1; - - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } - - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, - entry->timestamp, write_fault)) + if (ret == 1) return 1; } @@ -601,6 +584,7 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): + case IP_VERSION(3, 4, 0): adev->mmhub.funcs = &mmhub_v3_3_funcs; break; default: @@ -619,6 +603,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -795,6 +780,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): + case IP_VERSION(11, 5, 4): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* @@ -838,7 +824,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + drm_warn(adev_to_drm(adev), "No suitable DMA available.\n"); return r; } @@ -939,7 +925,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) adev->mmhub.funcs->set_fault_enable_default(adev, value); gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 7a9d6894e321..b9671fc39e2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v12_0.h" +#include "gmc_v12_1.h" #include "athub/athub_4_1_0_sh_mask.h" #include "athub/athub_4_1_0_offset.h" #include "oss/osssys_7_0_0_offset.h" @@ -38,7 +39,9 @@ #include "soc15_common.h" #include "nbif_v6_3_1.h" #include "gfxhub_v12_0.h" +#include "gfxhub_v12_1.h" #include "mmhub_v4_1_0.h" +#include "mmhub_v4_2_0.h" #include "athub_v4_1_0.h" #include "umc_v8_14.h" @@ -107,27 +110,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev, hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; if (retry_fault) { + int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0, + write_fault); /* Returning 1 here also prevents sending the IV to the KFD */ - - /* Process it only if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, - entry->timestamp)) - return 1; - - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } - - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, - entry->timestamp, write_fault)) + if (ret == 1) return 1; } @@ -623,6 +609,9 @@ static void gmc_v12_0_set_mmhub_funcs(struct amdgpu_device *adev) case IP_VERSION(4, 1, 0): adev->mmhub.funcs = &mmhub_v4_1_0_funcs; break; + case IP_VERSION(4, 2, 0): + adev->mmhub.funcs = &mmhub_v4_2_0_funcs; + break; default: break; } @@ -635,6 +624,9 @@ static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev) case IP_VERSION(12, 0, 1): adev->gfxhub.funcs = &gfxhub_v12_0_funcs; break; + case IP_VERSION(12, 1, 0): + adev->gfxhub.funcs = &gfxhub_v12_1_funcs; + break; default: break; } @@ -644,10 +636,19 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 1, 0): + gmc_v12_1_set_gmc_funcs(adev); + gmc_v12_1_set_irq_funcs(adev); + adev->gmc.init_pte_flags = AMDGPU_PTE_IS_PTE; + break; + default: + gmc_v12_0_set_gmc_funcs(adev); + gmc_v12_0_set_irq_funcs(adev); + break; + } gmc_v12_0_set_gfxhub_funcs(adev); gmc_v12_0_set_mmhub_funcs(adev); - gmc_v12_0_set_gmc_funcs(adev); - gmc_v12_0_set_irq_funcs(adev); gmc_v12_0_set_umc_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; @@ -656,6 +657,7 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block) adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } @@ -771,6 +773,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = ip_block->adev; + int i; adev->mmhub.funcs->init(adev); @@ -778,12 +781,15 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - adev->gmc.vram_width = vram_width; - - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { + gmc_v12_1_init_vram_info(adev); + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + adev->gmc.vram_width = vram_width; + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): @@ -797,6 +803,18 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; + case IP_VERSION(12, 1, 0): + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), + NUM_XCC(adev->gfx.xcc_mask)); + for (i = 0; i < hweight32(adev->aid_mask); i++) + set_bit(AMDGPU_MMHUB0(i), adev->vmhubs_mask); + /* + * To fulfill 5-level page support, + * vm size is 128PetaByte (57bit), maximum size, + * block size 512 (9bit) + */ + amdgpu_vm_adjust_size(adev, 128 * 1024 * 1024, 9, 4, 57); + break; default: break; } @@ -809,13 +827,33 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, - UTCL2_1_0__SRCID__FAULT, - &adev->gmc.vm_fault); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_UTCL2, + UTCL2_1_0__SRCID__FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + /* Add GCVM UTCL2 Retry fault */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_UTCL2, + UTCL2_1_0__SRCID__RETRY, + &adev->gmc.vm_fault); + if (r) + return r; + + /* Add MMVM UTCL2 Retry fault */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC, + VMC_1_0__SRCID__VM_RETRY, + &adev->gmc.vm_fault); + } else { + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + UTCL2_1_0__SRCID__FAULT, + &adev->gmc.vm_fault); + } if (r) return r; - if (!amdgpu_sriov_vf(adev)) { + if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) && + !amdgpu_sriov_vf(adev)) { /* interrupt sent to DF. */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, &adev->gmc.ecc_irq); @@ -827,11 +865,11 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) * Set the internal MC address mask This is the max address of the GPU's * internal address space. */ - adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + adev->gmc.mc_mask = AMDGPU_GMC_HOLE_MASK; r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + drm_warn(adev_to_drm(adev), "No suitable DMA available.\n"); return r; } @@ -843,6 +881,14 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { + r = amdgpu_gmc_init_mem_ranges(adev); + if (r) + return r; + } +#endif + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -858,7 +904,11 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; + adev->vm_manager.first_kfd_vmid = + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0) ? + 3 : 8; + adev->vm_manager.first_kfd_vmid = + adev->gfx.disable_kq ? 1 : (adev->vm_manager.first_kfd_vmid); amdgpu_vm_manager_init(adev); @@ -924,9 +974,9 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); - dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); @@ -1026,7 +1076,10 @@ static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, if (r) return r; - return athub_v4_1_0_set_clockgating(adev, state); + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) + return athub_v4_1_0_set_clockgating(adev, state); + else + return 0; } static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) @@ -1035,7 +1088,8 @@ static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u6 adev->mmhub.funcs->get_clockgating(adev, flags); - athub_v4_1_0_get_clockgating(adev, flags); + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) + athub_v4_1_0_get_clockgating(adev, flags); } static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c new file mode 100644 index 000000000000..ef6e550ce7c3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c @@ -0,0 +1,637 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "gmc_v12_1.h" +#include "soc15_common.h" +#include "soc_v1_0_enum.h" +#include "oss/osssys_7_1_0_offset.h" +#include "oss/osssys_7_1_0_sh_mask.h" +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" + +static int gmc_v12_1_vm_fault_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + struct amdgpu_vmhub *hub; + u32 tmp, reg, i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { + hub = &adev->vmhub[j]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) + continue; + + if (j >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_XCC(reg, j); + + tmp &= ~hub->vm_cntx_cntl_vm_fault; + + if (j >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_XCC(reg, tmp, j); + } + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { + hub = &adev->vmhub[j]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) + continue; + + if (j >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_XCC(reg, j); + + tmp |= hub->vm_cntx_cntl_vm_fault; + + if (j >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_XCC(reg, tmp, j); + } + } + break; + default: + break; + } + + return 0; +} + +static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_task_info *task_info; + bool retry_fault = false, write_fault = false; + unsigned int vmhub, node_id; + struct amdgpu_vmhub *hub; + uint32_t cam_index = 0; + const char *hub_name; + int ret, xcc_id = 0; + uint32_t status = 0; + u64 addr; + + node_id = entry->node_id; + + addr = (u64)entry->src_data[0] << 12; + addr |= ((u64)entry->src_data[1] & 0x1fff) << 44; + + if (entry->src_id == UTCL2_1_0__SRCID__RETRY) { + retry_fault = true; + write_fault = !!(entry->src_data[1] & 0x200000); + } + + if (entry->client_id == SOC_V1_0_IH_CLIENTID_VMC) { + hub_name = "mmhub0"; + vmhub = AMDGPU_MMHUB0(node_id / 4); + } else { + hub_name = "gfxhub0"; + if (adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + vmhub = xcc_id; + } + + hub = &adev->vmhub[vmhub]; + + if (retry_fault) { + if (adev->irq.retry_cam_enabled) { + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + cam_index = entry->src_data[3] & 0x3ff; + + ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, entry->timestamp, write_fault); + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + if (ret) + return 1; + } else { + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, entry->timestamp, write_fault)) + return 1; + } + } + + if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) + return 1; + + if (!printk_ratelimit()) + return 0; + + dev_err(adev->dev, + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + amdgpu_vm_print_task_info(adev, task_info); + amdgpu_vm_put_task_info(task_info); + } + + dev_err(adev->dev, " in page starting at address 0x%016llx from IH client %d (%s)\n", + addr, entry->client_id, soc_v1_0_ih_clientid_name[entry->client_id]); + + if (amdgpu_sriov_vf(adev)) + return 0; + + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB(0)) + RREG32(hub->vm_l2_pro_fault_status); + + status = RREG32(hub->vm_l2_pro_fault_status); + + /* Only print L2 fault status if the status register could be read and + * contains useful information + */ + if (!status) + return 0; + + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); + + hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); + + return 0; +} + +static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint8_t inst, + uint16_t *p_pasid) +{ + uint16_t index; + + if (inst/4) + index = 0xA + inst%4; + else + index = 0x2 + inst%4; + + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), index); + + *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff; + + return !!(*p_pasid); +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ + +static void gmc_v12_1_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, + unsigned int vmhub, uint32_t flush_type) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); + u32 tmp; + /* Use register 17 for GART */ + const unsigned eng = 17; + unsigned int i; + unsigned char hub_ip = 0; + + hub_ip = (AMDGPU_IS_GFXHUB(vmhub)) ? + GC_HWIP : MMHUB_HWIP; + + spin_lock(&adev->gmc.invalidate_lock); + + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); + + /* Wait for ACK with a delay.*/ + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng, hub_ip); + tmp &= 1 << vmid; + if (tmp) + break; + + udelay(1); + } + + /* Issue additional private vm invalidation to MMHUB */ + if (!AMDGPU_IS_GFXHUB(vmhub) && + (hub->vm_l2_bank_select_reserved_cid2) && + !amdgpu_sriov_vf(adev)) { + inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); + /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ + inv_req |= (1 << 25); + /* Issue private invalidation */ + WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req); + /* Read back to ensure invalidation is done*/ + RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); + } + + spin_unlock(&adev->gmc.invalidate_lock); + + if (i < adev->usec_timeout) + return; + + dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); +} + +/** + * gmc_v12_1_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * @vmhub: which hub to flush + * @flush_type: the flush type + * + * Flush the TLB for the requested page table. + */ +static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) +{ + u32 inst; + + if (AMDGPU_IS_GFXHUB(vmhub) && + !adev->gfx.is_poweron) + return; + + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = 0; + else + inst = vmhub; + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal + */ + if (((adev->gfx.kiq[inst].ring.sched.ready || + adev->mes.ring[MES_PIPE_INST(inst, 0)].sched.ready) && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)))) { + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + const unsigned eng = 17; + u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); + u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, inst); + return; + } + + mutex_lock(&adev->mman.gtt_window_lock); + gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; +} + +/** + * gmc_v12_1_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation + * + * Flush the TLB for the requested pasid. + */ +static void gmc_v12_1_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) +{ + uint16_t queried; + int vmid, i; + + if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready && + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x6f) { + struct mes_inv_tlbs_pasid_input input = {0}; + input.xcc_id = inst; + input.pasid = pasid; + input.flush_type = flush_type; + + /* MES will invalidate hubs for the device(including slave xcc) from master, ignore request from slave */ + if (!amdgpu_gfx_is_master_xcc(adev, inst)) + return; + + input.hub_id = AMDGPU_GFXHUB(0); + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + + if (all_hub) { + /* invalidate mm_hub */ + if (test_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask)) { + input.hub_id = AMDGPU_MMHUB0(0); + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + } + if (test_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask)) { + input.hub_id = AMDGPU_MMHUB1(0); + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + } + } + return; + } + + for (vmid = 1; vmid < 16; vmid++) { + bool valid; + + valid = gmc_v12_1_get_vmid_pasid_mapping_info(adev, vmid, inst, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v12_1_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v12_1_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(inst), + flush_type); + } + } +} + +static uint64_t gmc_v12_1_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + unsigned eng = ring->vm_inv_eng; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, + req, 1 << vmid); + + return pd_addr; +} + +static void gmc_v12_1_emit_pasid_mapping(struct amdgpu_ring *ring, + unsigned vmid, unsigned pasid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg; + + if (ring->vm_hub == AMDGPU_GFXHUB(0)) + reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; + else + reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; + + amdgpu_ring_emit_wreg(ring, reg, pasid); +} + +/* + * PTE format: + * 63 P + * 62:59 reserved + * 58 D + * 57 G + * 56 T + * 55:54 M + * 53:52 SW + * 51:48 reserved for future + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid + * + * PDE format: + * 63 P + * 62:58 block fragment size + * 57 reserved + * 56 A + * 55:54 M + * 53:52 reserved + * 51:48 reserved for future + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid + */ + +static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) +{ + if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM)) + *addr = adev->vm_manager.vram_base_offset + *addr - + adev->gmc.vram_start; + BUG_ON(*addr & 0xFFFF00000000003FULL); + + *flags |= AMDGPU_PTE_SNOOPED; + + if (!adev->gmc.translate_further) + return; + + if (level == AMDGPU_VM_PDB1) { + /* Set the block fragment size */ + if (!(*flags & AMDGPU_PDE_PTE_GFX12)) + *flags |= AMDGPU_PDE_BFS_GFX12(0x9); + + } else if (level == AMDGPU_VM_PDB0) { + if (*flags & AMDGPU_PDE_PTE_GFX12) + *flags &= ~AMDGPU_PDE_PTE_GFX12; + } +} + +static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t *flags) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + bool is_vram = bo->tbo.resource && + bo->tbo.resource->mem_type == TTM_PL_VRAM; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT); + bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); + bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; + unsigned int mtype, mtype_local; + bool snoop = false; + bool is_local = false; + + switch (gc_ip_version) { + case IP_VERSION(12, 1, 0): + mtype_local = MTYPE_RW; + if (amdgpu_mtype_local == 1) { + DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + mtype_local = MTYPE_NC; + } else if (amdgpu_mtype_local == 2) { + DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n"); + } else { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + } + + is_local = (is_vram && adev == bo_adev); + snoop = true; + if (uncached) { + mtype = MTYPE_UC; + } else if (ext_coherent) { + mtype = is_local ? mtype_local : MTYPE_UC; + } else { + if (is_local) + mtype = mtype_local; + else + mtype = MTYPE_NC; + } + break; + default: + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + } + + if (mtype != MTYPE_NC) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype); + + if (is_local || adev->have_atomics_support) + *flags |= AMDGPU_PTE_BUS_ATOMICS; + + *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; +} + +static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, + uint64_t *flags) +{ + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; + + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_RW: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_RW); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); + break; + } + + if ((*flags & AMDGPU_PTE_VALID) && bo) + gmc_v12_1_get_coherence_flags(adev, bo, flags); +} + +static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = { + .flush_gpu_tlb = gmc_v12_1_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v12_1_flush_gpu_tlb_pasid, + .emit_flush_gpu_tlb = gmc_v12_1_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v12_1_emit_pasid_mapping, + .get_vm_pde = gmc_v12_1_get_vm_pde, + .get_vm_pte = gmc_v12_1_get_vm_pte, + .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition, + .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, +}; + +void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev) +{ + adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs; +} + +static const struct amdgpu_irq_src_funcs gmc_v12_1_irq_funcs = { + .set = gmc_v12_1_vm_fault_interrupt_state, + .process = gmc_v12_1_process_interrupt, +}; + +void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs; +} + +void gmc_v12_1_init_vram_info(struct amdgpu_device *adev) +{ + /* TODO: query vram_info from ip discovery binary */ + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM4; + adev->gmc.vram_width = 384 * 64; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h new file mode 100644 index 000000000000..2f08f4b8cd99 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GMC_V12_1_H__ +#define __GMC_V12_1_H__ + +void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev); +void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev); +void gmc_v12_1_init_vram_info(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a8ec95f42926..886bf77309a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -555,7 +555,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) gmc_v6_0_set_fault_enable_default(adev, true); gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); - dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index fbd0bf147f50..b2e87d3aa203 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -705,7 +705,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) } gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6551b60f2584..1d5bd90ac57f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -560,7 +560,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) tmp = RREG32(mmCONFIG_MEMSIZE); /* some boards may have garbage in the upper 16 bits */ if (tmp & 0xffff0000) { - DRM_INFO("Probable bad vram size: 0x%08x\n", tmp); + drm_info(adev_to_drm(adev), "Probably bad vram size: 0x%08x\n", tmp); if (tmp & 0xffff) tmp &= 0xffff; } @@ -939,7 +939,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) gmc_v8_0_set_fault_enable_default(adev, true); gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8ad7519f7b58..e35ed0cc2ec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -583,44 +583,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, hub = &adev->vmhub[vmhub]; if (retry_fault) { - if (adev->irq.retry_cam_enabled) { - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } - - cam_index = entry->src_data[2] & 0x3ff; + cam_index = entry->src_data[2] & 0x3ff; - ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, entry->timestamp, write_fault); - WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); - if (ret) - return 1; - } else { - /* Process it onyl if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, - entry->timestamp)) - return 1; - - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } - - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, entry->timestamp, write_fault)) - return 1; - } + ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, cam_index, node_id, + write_fault); + /* Returning 1 here also prevents sending the IV to the KFD */ + if (ret == 1) + return 1; } if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) @@ -1168,13 +1137,13 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, */ mtype_local = MTYPE_RW; if (amdgpu_mtype_local == 1) { - DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + drm_info_once(adev_to_drm(adev), "Using MTYPE_NC for local memory\n"); mtype_local = MTYPE_NC; } else if (amdgpu_mtype_local == 2) { - DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + drm_info_once(adev_to_drm(adev), "Using MTYPE_CC for local memory\n"); mtype_local = MTYPE_CC; } else { - DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + drm_info_once(adev_to_drm(adev), "Using MTYPE_RW for local memory\n"); } is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || @@ -1235,16 +1204,16 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC); break; case AMDGPU_VM_MTYPE_WC: - *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC); break; case AMDGPU_VM_MTYPE_RW: - *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW); break; case AMDGPU_VM_MTYPE_CC: - *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); break; case AMDGPU_VM_MTYPE_UC: - *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC); break; } @@ -2006,7 +1975,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) 44; r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { - dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + drm_warn(adev_to_drm(adev), "No suitable DMA available.\n"); return r; } adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits); @@ -2162,12 +2131,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - DRM_INFO("PCIE GART of %uM enabled.\n", + drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled.\n", (unsigned int)(adev->gmc.gart_size >> 20)); if (adev->gmc.pdb0_bo) - DRM_INFO("PDB0 located at 0x%016llX\n", + drm_info(adev_to_drm(adev), "PDB0 located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo)); - DRM_INFO("PTB located at 0x%016llX\n", + drm_info(adev_to_drm(adev), "PTB located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index b32ea4129c61..451828bf583e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -279,6 +279,16 @@ static int ih_v7_0_enable_ring(struct amdgpu_device *adev, return 0; } +static uint32_t ih_v7_0_setup_retry_doorbell(u32 doorbell_index) +{ + u32 val = 0; + + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index); + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1); + + return val; +} + /** * ih_v7_0_irq_init - init and enable the interrupt ring * @@ -363,6 +373,21 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev) pci_set_master(adev->pdev); + if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0)) { + /* Allocate the doorbell for IH Retry CAM */ + adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 2) << 1; + WREG32_SOC15(OSSSYS, 0, regIH_DOORBELL_RETRY_CAM, + ih_v7_0_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index)); + + /* Enable IH Retry CAM */ + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RETRY_INT_CAM_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RETRY_INT_CAM_CNTL, ENABLE, 1); + tmp = REG_SET_FIELD(tmp, IH_RETRY_INT_CAM_CNTL, CAM_SIZE, 0xF); + WREG32_SOC15(OSSSYS, 0, regIH_RETRY_INT_CAM_CNTL, tmp); + + adev->irq.retry_cam_enabled = true; + } + /* enable interrupts */ ret = ih_v7_0_toggle_interrupts(adev, true); if (ret) @@ -542,6 +567,7 @@ static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block) int r; struct amdgpu_device *adev = ip_block->adev; bool use_bus_addr; + unsigned int sw_ring_size; r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, &adev->irq.self_irq); @@ -573,7 +599,9 @@ static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block) /* initialize ih control register offset */ ih_v7_0_init_register_offset(adev); - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + sw_ring_size = (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0)) ? + IH_SW_RING_SIZE : PAGE_SIZE; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, sw_ring_size, true); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index cc626036ed9c..46d25d55ebbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -41,6 +41,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_1.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.c new file mode 100644 index 000000000000..539785a51f69 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.c @@ -0,0 +1,166 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_imu.h" +#include "amdgpu_dpm.h" + +#include "imu_v12_1.h" + +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "mmhub/mmhub_4_2_0_offset.h" + +MODULE_FIRMWARE("amdgpu/gc_12_1_0_imu.bin"); + +#define TRANSFER_RAM_MASK 0x001c0000 + +static int imu_v12_1_init_microcode(struct amdgpu_device *adev) +{ + char ucode_prefix[15]; + int err; + const struct imu_firmware_header_v1_0 *imu_hdr; + struct amdgpu_firmware_info *info = NULL; + + DRM_DEBUG("\n"); + + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); + if (err) + goto out; + + imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; + adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I]; + info->ucode_id = AMDGPU_UCODE_ID_IMU_I; + info->fw = adev->gfx.imu_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D]; + info->ucode_id = AMDGPU_UCODE_ID_IMU_D; + info->fw = adev->gfx.imu_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + dev_err(adev->dev, + "gfx12: Failed to load firmware \"%s_imu.bin\"\n", + ucode_prefix); + amdgpu_ucode_release(&adev->gfx.imu_fw); + } + + return err; +} + +static void imu_v12_1_xcc_load_microcode(struct amdgpu_device *adev, + int xcc_id) +{ + const struct imu_firmware_header_v1_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; + fw_data = (const __le32 *)(adev->gfx.imu_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGFX_IMU_I_RAM_ADDR, 0); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGFX_IMU_I_RAM_DATA, + le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGFX_IMU_I_RAM_ADDR, + adev->gfx.imu_fw_version); + + fw_data = (const __le32 *)(adev->gfx.imu_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + + le32_to_cpu(hdr->imu_iram_ucode_size_bytes)); + fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGFX_IMU_D_RAM_ADDR, 0); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGFX_IMU_D_RAM_DATA, + le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGFX_IMU_D_RAM_ADDR, + adev->gfx.imu_fw_version); +} + +static int imu_v12_1_load_microcode(struct amdgpu_device *adev) +{ + int i, num_xcc; + + if (!adev->gfx.imu_fw) + return -EINVAL; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + imu_v12_1_xcc_load_microcode(adev, i); + } + + return 0; +} + +static int imu_v12_1_switch_compute_partition(struct amdgpu_device *adev, + int num_xccs_per_xcp, + int compute_partition_mode) +{ + int ret; + + if (adev->psp.funcs) { + /*TODO: revisit asp interface once it's avaialble */ + ret = psp_spatial_partition(&adev->psp, + NUM_XCC(adev->gfx.xcc_mask) / + num_xccs_per_xcp); + if (ret) + return ret; + } + + adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; + + return 0; +} + +static void imu_v12_1_init_mcm_addr_lut(struct amdgpu_device *adev) +{ + /* todo: fill in when interface is ready */ +} + +const struct amdgpu_imu_funcs gfx_v12_1_imu_funcs = { + .init_microcode = imu_v12_1_init_microcode, + .load_microcode = imu_v12_1_load_microcode, + .switch_compute_partition = imu_v12_1_switch_compute_partition, + .init_mcm_addr_lut = imu_v12_1_init_mcm_addr_lut, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_1.h b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.h new file mode 100644 index 000000000000..4a7c12bf7b0f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __IMU_V12_1_H__ +#define __IMU_V12_1_H__ + +extern const struct amdgpu_imu_funcs gfx_v12_1_imu_funcs; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 4258d3e0b706..0002bcc6c4ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -26,6 +26,7 @@ */ #include <linux/gpio/machine.h> +#include <linux/pm_runtime.h> #include "amdgpu.h" #include "isp_v4_1_1.h" @@ -145,6 +146,9 @@ static int isp_genpd_add_device(struct device *dev, void *data) return -ENODEV; } + /* The devices will be managed by the pm ops from the parent */ + dev_pm_syscore_device(dev, true); + exit: /* Continue to add */ return 0; @@ -177,12 +181,47 @@ static int isp_genpd_remove_device(struct device *dev, void *data) drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret); return -ENODEV; } + dev_pm_syscore_device(dev, false); exit: /* Continue to remove */ return 0; } +static int isp_suspend_device(struct device *dev, void *data) +{ + return pm_runtime_force_suspend(dev); +} + +static int isp_resume_device(struct device *dev, void *data) +{ + return pm_runtime_force_resume(dev); +} + +static int isp_v4_1_1_hw_suspend(struct amdgpu_isp *isp) +{ + int r; + + r = device_for_each_child(isp->parent, NULL, + isp_suspend_device); + if (r) + dev_err(isp->parent, "failed to suspend hw devices (%d)\n", r); + + return r; +} + +static int isp_v4_1_1_hw_resume(struct amdgpu_isp *isp) +{ + int r; + + r = device_for_each_child(isp->parent, NULL, + isp_resume_device); + if (r) + dev_err(isp->parent, "failed to resume hw device (%d)\n", r); + + return r; +} + static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { const struct software_node *amd_camera_node, *isp4_node; @@ -369,6 +408,8 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) static const struct isp_funcs isp_v4_1_1_funcs = { .hw_init = isp_v4_1_1_hw_init, .hw_fini = isp_v4_1_1_hw_fini, + .hw_suspend = isp_v4_1_1_hw_suspend, + .hw_resume = isp_v4_1_1_hw_resume, }; void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 27c76bd424cf..9fe8d10ab270 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -240,7 +240,7 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev) UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { - DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + drm_err(adev_to_drm(adev), "failed to disable JPEG power gating\n"); return r; } } @@ -271,7 +271,7 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device *adev) UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { - DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + drm_err(adev_to_drm(adev), "failed to enable JPEG power gating\n"); return r; } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index d1a011c40ba2..98f5e0622bc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -298,7 +298,7 @@ static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev) UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { - DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + drm_err(adev_to_drm(adev), "failed to disable JPEG power gating\n"); return r; } } @@ -333,7 +333,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev) UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { - DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + drm_err(adev_to_drm(adev), "failed to enable JPEG power gating\n"); return r; } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 33db2c1ae6cc..0bd83820dd20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -335,7 +335,7 @@ static int jpeg_v4_0_disable_static_power_gating(struct amdgpu_device *adev) UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { - DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG disable power gating failed\n"); + drm_err(adev_to_drm(adev), "failed to disable JPEG power gating\n"); return r; } } @@ -370,7 +370,7 @@ static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev) UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { - DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG enable power gating failed\n"); + drm_err(adev_to_drm(adev), "failed to enable JPEG power gating\n"); return r; } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c new file mode 100644 index 000000000000..1821dced936f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c @@ -0,0 +1,714 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v2_0.h" +#include "jpeg_v4_0_3.h" + +#include "vcn/vcn_5_3_0_offset.h" +#include "vcn/vcn_5_3_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +#include "jpeg_v5_3_0.h" + +static void jpeg_v5_3_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_3_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); + + +/** + * jpeg_v5_3_0_early_init - set function pointers + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + */ +static int jpeg_v5_3_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; + + jpeg_v5_3_0_set_dec_ring_funcs(adev); + jpeg_v5_3_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_3_0_sw_init - sw init for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int jpeg_v5_3_0_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB0(0); + + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + + /* TODO: Add queue reset mask when FW fully supports it */ + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; +} + +/** + * jpeg_v5_3_0_sw_fini - sw fini for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_3_0_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_3_0_hw_init - start and test JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + */ +static int jpeg_v5_3_0_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + /* Skip ring test because pause DPG is not implemented. */ + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) + return 0; + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + return 0; +} + +/** + * jpeg_v5_3_0_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_3_0_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS)) + jpeg_v5_3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + + return 0; +} + +/** + * jpeg_v5_3_0_suspend - suspend JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_3_0_suspend(struct amdgpu_ip_block *ip_block) +{ + int r; + + r = jpeg_v5_3_0_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_jpeg_suspend(ip_block->adev); + + return r; +} + +/** + * jpeg_v5_3_0_resume - resume JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_3_0_resume(struct amdgpu_ip_block *ip_block) +{ + int r; + + r = amdgpu_jpeg_resume(ip_block->adev); + if (r) + return r; + + r = jpeg_v5_3_0_hw_init(ip_block); + + return r; +} + +static void jpeg_v5_3_0_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); +} + +static void jpeg_v5_3_0_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + + data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static int jpeg_v5_3_0_disable_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = 1 << UVD_IPX_DLDO_CONFIG_ONO1__ONO1_PWR_CONFIG__SHIFT; + WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG_ONO1, data); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + return 0; +} + +static int jpeg_v5_3_0_enable_power_gating(struct amdgpu_device *adev) +{ + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG_ONO1), + 2 << UVD_IPX_DLDO_CONFIG_ONO1__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } + + return 0; +} + +static void jpeg_engine_5_0_0_dpg_clock_gating_mode(struct amdgpu_device *adev, + int inst_idx, uint8_t indirect) +{ + uint32_t data = 0; + + // JPEG disable CGC + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + + if (indirect) { + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect); + + // Turn on All JPEG clocks + data = 0; + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_GATE, data, indirect); + } else { + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect); + + // Turn on All JPEG clocks + data = 0; + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_GATE, data, indirect); + } +} + +/** + * jpeg_v5_3_0_start_dpg_mode - Jpeg start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start JPEG block with dpg mode + */ +static int jpeg_v5_3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec; + uint32_t reg_data = 0; + + jpeg_v5_3_0_enable_power_gating(adev); + + // enable dynamic power gating mode + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); + + if (indirect) + adev->jpeg.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr; + + jpeg_engine_5_0_0_dpg_clock_gating_mode(adev, inst_idx, indirect); + + /* MJPEG global tiling registers */ + if (indirect) + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, indirect); + else + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 1); + + /* enable System Interrupt for JRBC */ + if (indirect) + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_SYS_INT_EN, + JPEG_SYS_INT_EN__DJRBC0_MASK, indirect); + else + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_SYS_INT_EN, + JPEG_SYS_INT_EN__DJRBC0_MASK, 1); + + if (indirect) { + /* add nop to workaround PSP size check */ + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipUVD_NO_OP, 0, indirect); + + amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0); + } + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v5_3_0_stop_dpg_mode - Jpeg stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop JPEG block with dpg mode + */ +static void jpeg_v5_3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t reg_data = 0; + + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); +} + +/** + * jpeg_v5_3_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_3_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + r = jpeg_v5_3_0_start_dpg_mode(adev, 0, adev->jpeg.indirect_sram); + return r; + } + + /* disable power gating */ + r = jpeg_v5_3_0_disable_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v5_3_0_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK, + ~JPEG_SYS_INT_EN__DJRBC0_MASK); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v5_3_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_3_0_stop(struct amdgpu_device *adev) +{ + int r; + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v5_3_0_stop_dpg_mode(adev, 0); + } else { + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v5_3_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v5_3_0_enable_power_gating(adev); + if (r) + return r; + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v5_3_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_3_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v5_3_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_3_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v5_3_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_3_0_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 1; + + ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + + return ret; +} + +static int jpeg_v5_3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v5_3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (!jpeg_v5_3_0_is_idle(ip_block)) + return -EBUSY; + jpeg_v5_3_0_enable_clock_gating(adev); + } else { + jpeg_v5_3_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v5_3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_3_0_stop(adev); + else + ret = jpeg_v5_3_0_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_3_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_3_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_5_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(adev->jpeg.inst->ring_dec); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static int jpeg_v5_3_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v5_3_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v5_3_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static const struct amd_ip_funcs jpeg_v5_3_0_ip_funcs = { + .name = "jpeg_v5_3_0", + .early_init = jpeg_v5_3_0_early_init, + .sw_init = jpeg_v5_3_0_sw_init, + .sw_fini = jpeg_v5_3_0_sw_fini, + .hw_init = jpeg_v5_3_0_hw_init, + .hw_fini = jpeg_v5_3_0_hw_fini, + .suspend = jpeg_v5_3_0_suspend, + .resume = jpeg_v5_3_0_resume, + .is_idle = jpeg_v5_3_0_is_idle, + .wait_for_idle = jpeg_v5_3_0_wait_for_idle, + .set_clockgating_state = jpeg_v5_3_0_set_clockgating_state, + .set_powergating_state = jpeg_v5_3_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_3_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_3_0_dec_ring_get_rptr, + .get_wptr = jpeg_v5_3_0_dec_ring_get_wptr, + .set_wptr = jpeg_v5_3_0_dec_ring_set_wptr, + .parse_cs = amdgpu_jpeg_dec_parse_cs, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_3_0_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_3_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_3_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v5_3_0_ring_reset, +}; + +static void jpeg_v5_3_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_3_0_dec_ring_vm_funcs; +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_3_0_irq_funcs = { + .set = jpeg_v5_3_0_set_interrupt_state, + .process = jpeg_v5_3_0_process_interrupt, +}; + +static void jpeg_v5_3_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v5_3_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_3_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 3, + .rev = 0, + .funcs = &jpeg_v5_3_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h new file mode 100644 index 000000000000..c1e7537d0f18 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_3_0_H__ +#define __JPEG_V5_3_0_H__ + +#define vcnipJPEG_CGC_GATE 0x4160 +#define vcnipJPEG_CGC_CTRL 0x4161 +#define vcnipJPEG_SYS_INT_EN 0x4141 +#define vcnipUVD_NO_OP 0x0029 +#define vcnipJPEG_DEC_GFX10_ADDR_CONFIG 0x404A + +extern const struct amdgpu_ip_block_version jpeg_v5_3_0_ip_block; + +#endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 64cae89357b6..f2309d72bbe6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -59,7 +59,8 @@ err_reserve_bo_failed: } static int -mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, +mes_userq_create_wptr_mapping(struct amdgpu_device *adev, + struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, uint64_t wptr) { @@ -93,8 +94,28 @@ mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, return ret; } - queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + ret = amdgpu_bo_reserve(wptr_obj->obj, true); + if (ret) { + DRM_ERROR("Failed to reserve wptr bo\n"); + return ret; + } + + /* TODO use eviction fence instead of pinning. */ + ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n"); + goto unresv_bo; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); + amdgpu_bo_unreserve(wptr_obj->obj); + return 0; + +unresv_bo: + amdgpu_bo_unreserve(wptr_obj->obj); + return ret; + } static int convert_to_mes_priority(int priority) @@ -112,9 +133,9 @@ static int convert_to_mes_priority(int priority) } } -static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int mes_userq_map(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_userq_obj *ctx = &queue->fw_obj; struct amdgpu_mqd_prop *userq_props = queue->userq_prop; @@ -157,9 +178,9 @@ static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int mes_userq_unmap(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; struct mes_remove_queue_input queue_input; struct amdgpu_userq_obj *ctx = &queue->fw_obj; @@ -223,7 +244,7 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, amdgpu_mes_lock(&adev->mes); r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false, - &hung_db_num, db_array); + &hung_db_num, db_array, 0); amdgpu_mes_unlock(&adev->mes); if (r) { dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r); @@ -251,10 +272,10 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, return r; } -static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, - struct drm_amdgpu_userq_in *args_in, - struct amdgpu_usermode_queue *queue) +static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *args_in) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; struct drm_amdgpu_userq_in *mqd_user = args_in; @@ -300,7 +321,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } - r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va, + r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va, 2048); if (r) goto free_mqd; @@ -341,11 +362,11 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->tmz_queue = mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; - r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va, + r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va, shadow_info.shadow_size); if (r) goto free_mqd; - r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va, + r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va, shadow_info.csa_size); if (r) goto free_mqd; @@ -366,7 +387,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, r = -ENOMEM; goto free_mqd; } - r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va, + r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va, 32); if (r) goto free_mqd; @@ -391,7 +412,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, } /* FW expects WPTR BOs to be mapped into GART */ - r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + r = mes_userq_create_wptr_mapping(adev, uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { DRM_ERROR("Failed to create WPTR mapping\n"); goto free_ctx; @@ -411,18 +432,18 @@ free_props: return r; } -static void -mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; + amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); } -static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int mes_userq_preempt(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; struct mes_suspend_gang_input queue_input; struct amdgpu_userq_obj *ctx = &queue->fw_obj; @@ -466,9 +487,9 @@ out: return r; } -static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int mes_userq_restore(struct amdgpu_usermode_queue *queue) { + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; struct mes_resume_gang_input queue_input; struct amdgpu_userq_obj *ctx = &queue->fw_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 3a52754b5cad..81bf9385d55a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -56,11 +56,13 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_4_mes1.bin"); static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block); static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block); -static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); -static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); +static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); +static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); #define MES_EOP_SIZE 2048 #define GFX_MES_DRAM_SIZE 0x80000 @@ -660,7 +662,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; default: - DRM_ERROR("unsupported misc op (%d) \n", input->op); + drm_err(adev_to_drm(mes->adev), "unsupported misc op (%d)\n", input->op); return -EINVAL; } @@ -811,7 +813,7 @@ static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes, mes_reset_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); mes_reset_queue_pkt.doorbell_offset_addr = - mes->hung_queue_db_array_gpu_addr; + mes->hung_queue_db_array_gpu_addr[0]; if (input->detect_only) mes_reset_queue_pkt.hang_detect_only = 1; @@ -1570,7 +1572,7 @@ static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); } -static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) +static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) { int r = 0; struct amdgpu_ip_block *ip_block; @@ -1625,7 +1627,7 @@ failure: return r; } -static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) +static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) { if (adev->mes.ring[0].sched.ready) { mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 744e95d3984a..b1c864dc79a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -42,8 +42,8 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin"); static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block); static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block); -static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev); -static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); +static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); +static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); #define MES_EOP_SIZE 2048 @@ -699,7 +699,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, break; default: - DRM_ERROR("unsupported misc op (%d) \n", input->op); + DRM_ERROR("unsupported misc op (%d)\n", input->op); return -EINVAL; } @@ -939,7 +939,7 @@ static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes, mes_reset_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); mes_reset_queue_pkt.doorbell_offset_addr = - mes->hung_queue_db_array_gpu_addr; + mes->hung_queue_db_array_gpu_addr[0]; if (input->detect_only) mes_reset_queue_pkt.hang_detect_only = 1; @@ -1489,7 +1489,7 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_SCHED_PIPE) { if (adev->enable_uni_mes) - r = amdgpu_mes_map_legacy_queue(adev, ring); + r = amdgpu_mes_map_legacy_queue(adev, ring, 0); else r = mes_v12_0_kiq_enable_queue(adev); if (r) @@ -1739,7 +1739,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } -static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) +static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) { int r = 0; struct amdgpu_ip_block *ip_block; @@ -1801,13 +1801,13 @@ failure: return r; } -static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) +static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) { if (adev->mes.ring[0].sched.ready) { if (adev->enable_uni_mes) amdgpu_mes_unmap_legacy_queue(adev, &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], - RESET_QUEUES, 0, 0); + RESET_QUEUES, 0, 0, 0); else mes_v12_0_kiq_dequeue_sched(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c new file mode 100644 index 000000000000..7b8c670d0a9e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c @@ -0,0 +1,1968 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "soc15_common.h" +#include "soc_v1_0.h" +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "gc/gc_11_0_0_default.h" +#include "v12_structs.h" +#include "mes_v12_api_def.h" + +MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); + +static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); +static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); +static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); +static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); +static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); + +#define MES_EOP_SIZE 2048 + +#define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 +#define XCC_MID_MASK 0x41000000 + +static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); + } +} + +static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) +{ + return *ring->rptr_cpu_addr; +} + +static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) +{ + u64 wptr; + + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + else + BUG(); + return wptr; +} + +static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { + .type = AMDGPU_RING_TYPE_MES, + .align_mask = 1, + .nop = 0, + .support_64bit_ptrs = true, + .get_rptr = mes_v12_1_ring_get_rptr, + .get_wptr = mes_v12_1_ring_get_wptr, + .set_wptr = mes_v12_1_ring_set_wptr, + .insert_nop = amdgpu_ring_insert_nop, +}; + +static const char *mes_v12_1_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "unused", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", + "SET_SE_MODE", + "SET_GANG_SUBMIT", + "SET_HW_RSRC_1", + "INVALIDATE_TLBS", +}; + +static const char *mes_v12_1_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", + "NOTIFY_WORK_ON_UNMAPPED_QUEUE", + "NOTIFY_TO_UNMAP_PROCESSES", +}; + +static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) + op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) + op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + +static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, + int xcc_id, int pipe, void *pkt, + int size, int api_status_off) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 2100000; /* 2100 ms */ + struct amdgpu_device *adev = mes->adev; + struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; + spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; + const char *op_str, *misc_op_str; + unsigned long flags; + u64 status_gpu_addr; + u32 seq, status_offset; + u64 *status_ptr; + signed long r; + int ret; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; + + if (amdgpu_emu_mode) { + timeout *= 1000; + } else if (amdgpu_sriov_vf(adev)) { + /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ + timeout = 15 * 600 * 1000; + } + + ret = amdgpu_device_wb_get(adev, &status_offset); + if (ret) + return ret; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; + + spin_lock_irqsave(ring_lock, flags); + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; + + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = status_gpu_addr; + api_status->api_completion_fence_value = 1; + + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(ring_lock, flags); + + op_str = mes_v12_1_get_op_string(x_pkt); + misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); + + if (misc_op_str) + dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", + xcc_id, pipe, op_str, misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", + xcc_id, pipe, op_str); + else + dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", + xcc_id, pipe, x_pkt->header.opcode); + + r = amdgpu_fence_wait_polling(ring, seq, timeout); + if (r < 1 || !*status_ptr) { + if (misc_op_str) + dev_err(adev->dev, + "MES(%d, %d) failed to respond to msg=%s (%s)\n", + xcc_id, pipe, op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, + "MES(%d, %d) failed to respond to msg=%s\n", + xcc_id, pipe, op_str); + else + dev_err(adev->dev, + "MES(%d, %d) failed to respond to msg=%d\n", + xcc_id, pipe, x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + + r = -ETIMEDOUT; + goto error_wb_free; + } + + amdgpu_device_wb_free(adev, status_offset); + return 0; + +error_undo: + dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); + amdgpu_ring_undo(ring); + +error_unlock_free: + spin_unlock_irqrestore(ring_lock, flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; +} + +static int convert_to_mes_queue_type(int queue_type) +{ + if (queue_type == AMDGPU_RING_TYPE_GFX) + return MES_QUEUE_TYPE_GFX; + else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) + return MES_QUEUE_TYPE_COMPUTE; + else if (queue_type == AMDGPU_RING_TYPE_SDMA) + return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; + else + BUG(); + return -1; +} + +static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, + struct mes_add_queue_input *input) +{ + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int xcc_id = input->xcc_id; + int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); + + if (mes->enable_coop_mode) + xcc_id = mes->master_xcc_ids[inst]; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.process_id = input->process_id; + mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; + mes_add_queue_pkt.process_va_start = input->process_va_start; + mes_add_queue_pkt.process_va_end = input->process_va_end; + mes_add_queue_pkt.process_quantum = input->process_quantum; + mes_add_queue_pkt.process_context_addr = input->process_context_addr; + mes_add_queue_pkt.gang_quantum = input->gang_quantum; + mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; + mes_add_queue_pkt.inprocess_gang_priority = + input->inprocess_gang_priority; + mes_add_queue_pkt.gang_global_priority_level = + input->gang_global_priority_level; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + + mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; + + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.paging = input->paging; + mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; + mes_add_queue_pkt.gws_base = input->gws_base; + mes_add_queue_pkt.gws_size = input->gws_size; + mes_add_queue_pkt.trap_handler_addr = input->tba_addr; + mes_add_queue_pkt.tma_addr = input->tma_addr; + mes_add_queue_pkt.trap_en = input->trap_en; + mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; + mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + xcc_id, AMDGPU_MES_SCHED_PIPE, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + +static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int xcc_id = input->xcc_id; + int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); + + if (mes->enable_coop_mode) + xcc_id = mes->master_xcc_ids[inst]; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + xcc_id, AMDGPU_MES_SCHED_PIPE, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input) +{ + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.pipe_id = input->pipe_id; + mes_add_queue_pkt.queue_id = input->queue_id; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.map_legacy_kq = 1; + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + +static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, + struct mes_unmap_legacy_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = 0; + + mes_remove_queue_pkt.pipe_id = input->pipe_id; + mes_remove_queue_pkt.queue_id = input->queue_id; + + if (input->action == PREEMPT_QUEUES_NO_UNMAP) { + mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; + mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; + mes_remove_queue_pkt.tf_data = + lower_32_bits(input->trail_fence_data); + } else { + mes_remove_queue_pkt.unmap_legacy_queue = 1; + mes_remove_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + } + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input) +{ + return 0; +} + +static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input) +{ + return 0; +} + +static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, + int pipe, int xcc_id) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); +} +static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) +{ + /* Check xcc reg offset range */ + uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; + /* Each XCC has two register ranges. + * These are represented in reg_offset[17:16] + */ + return ((reg_offset >> 16) & 0x3) + xcc; +} + +static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, + struct RRMT_OPTION *rrmt_opt) +{ + uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); + + if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) { + rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); + rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? + MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; + } else { + rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; + } +} + +static int mes_v12_1_misc_op(struct amdgpu_mes *mes, + struct mes_misc_op_input *input) +{ + struct amdgpu_device *adev = mes->adev; + union MESAPI__MISC misc_pkt; + int pipe; + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + memset(&misc_pkt, 0, sizeof(misc_pkt)); + + misc_pkt.header.type = MES_API_TYPE_SCHEDULER; + misc_pkt.header.opcode = MES_SCH_API_MISC; + misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + switch (input->op) { + case MES_MISC_OP_READ_REG: + misc_pkt.opcode = MESAPI_MISC__READ_REG; + misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; + misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; + mes_v12_1_get_rrmt(input->read_reg.reg_offset, + GET_INST(GC, input->xcc_id), + &misc_pkt.read_reg.rrmt_opt); + if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { + misc_pkt.read_reg.reg_offset = + soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset); + } + break; + case MES_MISC_OP_WRITE_REG: + misc_pkt.opcode = MESAPI_MISC__WRITE_REG; + misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; + misc_pkt.write_reg.reg_value = input->write_reg.reg_value; + mes_v12_1_get_rrmt(input->write_reg.reg_offset, + GET_INST(GC, input->xcc_id), + &misc_pkt.write_reg.rrmt_opt); + if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { + misc_pkt.write_reg.reg_offset = + soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset); + } + break; + case MES_MISC_OP_WRM_REG_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = 0; + mes_v12_1_get_rrmt(input->wrm_reg.reg0, + GET_INST(GC, input->xcc_id), + &misc_pkt.wait_reg_mem.rrmt_opt1); + if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { + misc_pkt.wait_reg_mem.reg_offset1 = + soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); + } + break; + case MES_MISC_OP_WRM_REG_WR_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; + mes_v12_1_get_rrmt(input->wrm_reg.reg0, + GET_INST(GC, input->xcc_id), + &misc_pkt.wait_reg_mem.rrmt_opt1); + mes_v12_1_get_rrmt(input->wrm_reg.reg1, + GET_INST(GC, input->xcc_id), + &misc_pkt.wait_reg_mem.rrmt_opt2); + + if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { + misc_pkt.wait_reg_mem.reg_offset1 = + soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); + } + if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) { + misc_pkt.wait_reg_mem.reg_offset2 = + soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2); + } + break; + case MES_MISC_OP_SET_SHADER_DEBUGGER: + pipe = AMDGPU_MES_SCHED_PIPE; + misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; + misc_pkt.set_shader_debugger.process_context_addr = + input->set_shader_debugger.process_context_addr; + misc_pkt.set_shader_debugger.flags.u32all = + input->set_shader_debugger.flags.u32all; + misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = + input->set_shader_debugger.spi_gdbg_per_vmid_cntl; + memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, + input->set_shader_debugger.tcp_watch_cntl, + sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); + misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; + break; + case MES_MISC_OP_CHANGE_CONFIG: + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; + misc_pkt.change_config.opcode = + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; + misc_pkt.change_config.option.bits.limit_single_process = + input->change_config.option.limit_single_process; + break; + default: + DRM_ERROR("unsupported misc op (%d) \n", input->op); + return -EINVAL; + } + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &misc_pkt, sizeof(misc_pkt), + offsetof(union MESAPI__MISC, api_status)); +} + +static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, + int pipe, int xcc_id) +{ + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; + int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); + + memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); + + mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; + + if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) { + master_xcc_id = mes->master_xcc_ids[inst]; + mes_set_hw_res_1_pkt.mes_coop_mode = 1; + mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = + mes->shared_cmd_buf_gpu_addr[master_xcc_id]; + } + + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, + &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + +static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) +{ + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + pkt->gfx_hqd_mask[0] = 0xFE; +} + +static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, + int pipe, int xcc_id) +{ + int i; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt); + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } + + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; + + for (i = 0; i < 5; i++) { + mes_set_hw_res_pkt.gc_base[i] = + adev->reg_offset[GC_HWIP][0][i]; + mes_set_hw_res_pkt.mmhub_base[i] = + adev->reg_offset[MMHUB_HWIP][0][i]; + mes_set_hw_res_pkt.osssys_base[i] = + adev->reg_offset[OSSSYS_HWIP][0][i]; + } + + mes_set_hw_res_pkt.disable_reset = 1; + mes_set_hw_res_pkt.disable_mes_log = 1; + mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; + + /* + * Keep oversubscribe timer for sdma . When we have unmapped doorbell + * handling support, other queue will not use the oversubscribe timer. + * handling mode - 0: disabled; 1: basic version; 2: basic+ version + */ + mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; + + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; + } + + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) + mes_set_hw_res_pkt.limit_single_process = 1; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, + int xcc_id) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); +} + + +static void mes_v12_1_enable_unmapped_doorbell_handling( + struct amdgpu_mes *mes, bool enable, int xcc_id) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); + + /* + * The default PROC_LSB settng is 0xc which means doorbell + * addr[16:12] gives the doorbell page number. For kfd, each + * process will use 2 pages of doorbell, we need to change the + * setting to 0xd + */ + data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; + data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; + + data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); +} + +#if 0 +static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} +#endif + +static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) +{ + /* + * MES doesn't support invalidate gc_hub on slave xcc individually + * master xcc will invalidate all gc_hub for the partition + */ + if (AMDGPU_IS_GFXHUB(id)) + return 0; + else if (AMDGPU_IS_MMHUB0(id)) + return 1; + else if (AMDGPU_IS_MMHUB1(id)) + return 2; + return -EINVAL; + +} + +static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes, + struct mes_inv_tlbs_pasid_input *input) +{ + union MESAPI__INV_TLBS mes_inv_tlbs; + int xcc_id = input->xcc_id; + int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); + int ret; + + if (mes->enable_coop_mode) + xcc_id = mes->master_xcc_ids[inst]; + + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); + + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; + + /*convert amdgpu_mes_hub_id to mes expected hub_id */ + ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); + if (ret < 0) + return -EINVAL; + mes_inv_tlbs.invalidate_tlbs.hub_id = ret; + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE, + &mes_inv_tlbs, sizeof(mes_inv_tlbs), + offsetof(union MESAPI__INV_TLBS, api_status)); + +} + +static const struct amdgpu_mes_funcs mes_v12_1_funcs = { + .add_hw_queue = mes_v12_1_add_hw_queue, + .remove_hw_queue = mes_v12_1_remove_hw_queue, + .map_legacy_queue = mes_v12_1_map_legacy_queue, + .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, + .suspend_gang = mes_v12_1_suspend_gang, + .resume_gang = mes_v12_1_resume_gang, + .misc_op = mes_v12_1_misc_op, + .reset_hw_queue = mes_v12_1_reset_hw_queue, + .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid, +}; + +static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.ucode_fw_obj[inst], + &adev->mes.ucode_fw_gpu_addr[inst], + (void **)&adev->mes.ucode_fw_ptr[inst]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); + return r; + } + + memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); + amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); + + return 0; +} + +static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 64 * 1024, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.data_fw_obj[inst], + &adev->mes.data_fw_gpu_addr[inst], + (void **)&adev->mes.data_fw_ptr[inst]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); + return r; + } + + memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); + amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); + + return 0; +} + +static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int inst = MES_PIPE_INST(xcc_id, pipe); + + amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], + &adev->mes.data_fw_gpu_addr[inst], + (void **)&adev->mes.data_fw_ptr[inst]); + + amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], + &adev->mes.ucode_fw_gpu_addr[inst], + (void **)&adev->mes.ucode_fw_ptr[inst]); +} + +static void mes_v12_1_enable(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint64_t ucode_addr; + uint32_t pipe, data = 0; + + if (enable) { + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); + + mutex_lock(&adev->srbm_mutex); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, + GET_INST(GC, xcc_id)); + + ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regCP_MES_PRGRM_CNTR_START, + lower_32_bits(ucode_addr)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regCP_MES_PRGRM_CNTR_START_HI, + upper_32_bits(ucode_addr)); + } + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + /* unhalt MES and activate pipe0 */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); + + if (amdgpu_emu_mode) + msleep(500); + else if (adev->enable_uni_mes) + udelay(500); + else + udelay(50); + } else { + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, + MES_INVALIDATE_ICACHE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); + } +} + +static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, + int xcc_id) +{ + uint64_t ucode_addr; + int pipe; + + mes_v12_1_enable(adev, false, xcc_id); + + mutex_lock(&adev->srbm_mutex); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + /* me=3, queue=0 */ + soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); + + /* set ucode start address */ + ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, + lower_32_bits(ucode_addr)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, + upper_32_bits(ucode_addr)); + + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + } + mutex_unlock(&adev->srbm_mutex); +} + +/* This function is for backdoor MES firmware */ +static int mes_v12_1_load_microcode(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + bool prime_icache, int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + uint32_t data; + + mes_v12_1_enable(adev, false, xcc_id); + + if (!adev->mes.fw[pipe]) + return -EINVAL; + + r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); + if (r) + return r; + + r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); + if (r) { + mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); + return r; + } + + mutex_lock(&adev->srbm_mutex); + /* me=3, pipe=0, queue=0 */ + soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); + + /* set ucode fimrware address */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, + lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, + upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); + + /* set ucode instruction cache boundary to 2M-1 */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); + + /* set ucode data firmware address */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, + lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, + upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); + + /* Set data cache boundary CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); + + if (prime_icache) { + /* invalidate ICACHE */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); + + /* prime the ICACHE. */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); + } + + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + u32 *eop; + + r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.eop_gpu_obj[inst], + &adev->mes.eop_gpu_addr[inst], + (void **)&eop); + if (r) { + dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); + return r; + } + + memset(eop, 0, + adev->mes.eop_gpu_obj[inst]->tbo.base.size); + + amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); + amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); + + return 0; +} + +static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + + if (pipe == AMDGPU_MES_KIQ_PIPE) + return 0; + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.shared_cmd_buf_obj[inst], + &adev->mes.shared_cmd_buf_gpu_addr[inst], + NULL); + if (r) { + dev_err(adev->dev, + "(%d) failed to create shared cmd buf bo\n", r); + return r; + } + + return 0; +} + +static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) +{ + struct v12_1_mes_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000007; + + eop_base_addr = ring->eop_gpu_addr >> 8; + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MES_EOP_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + mqd->cp_hqd_eop_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = regCP_MQD_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = ring->rptr_gpu_addr; + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = ring->wptr_gpu_addr; + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); + mqd->cp_hqd_pq_control = tmp; + + /* enable doorbell */ + tmp = 0; + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + mqd->cp_hqd_pq_doorbell_control = tmp; + + mqd->cp_hqd_vmid = 0; + /* activate the queue */ + mqd->cp_hqd_active = 1; + + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, + PRELOAD_SIZE, 0x63); + mqd->cp_hqd_persistent_state = tmp; + + mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; + mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; + mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; + + /* + * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped + * doorbell handling. This is a reserved CP internal register can + * not be accesss by others + */ + mqd->cp_hqd_gfx_control = BIT(15); + + return 0; +} + +static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, + int xcc_id) +{ + struct v12_1_mes_mqd *mqd = ring->mqd_ptr; + struct amdgpu_device *adev = ring->adev; + uint32_t data = 0; + + mutex_lock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); + + /* set CP_HQD_VMID.VMID = 0. */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); + data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); + + /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); + + /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + + /* set CP_MQD_CONTROL.VMID=0 */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); + data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); + + /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + + /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* set CP_HQD_PQ_CONTROL */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); + + /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* set CP_HQD_PQ_DOORBELL_CONTROL */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + + /* set CP_HQD_ACTIVE.ACTIVE=1 */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); + + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); +} + +static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; + int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues) + return -EINVAL; + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("kfq enable failed\n"); + kiq_ring->sched.ready = false; + } + return r; +} + +static int mes_v12_1_queue_init(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + struct amdgpu_ring *ring; + int r; + + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + ring = &adev->gfx.kiq[xcc_id].ring; + else + ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; + + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && + (amdgpu_in_reset(adev) || adev->in_suspend)) { + *(ring->wptr_cpu_addr) = 0; + *(ring->rptr_cpu_addr) = 0; + amdgpu_ring_clear_ring(ring); + } + + r = mes_v12_1_mqd_init(ring); + if (r) + return r; + + if (pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); + else + r = mes_v12_1_kiq_enable_queue(adev, xcc_id); + if (r) + return r; + } else { + mes_v12_1_queue_init_register(ring, xcc_id); + } + + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); + + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v12_1_ring_init(struct amdgpu_device *adev, + int xcc_id, int pipe) +{ + struct amdgpu_ring *ring; + int inst = MES_PIPE_INST(xcc_id, pipe); + + ring = &adev->mes.ring[inst]; + + ring->funcs = &mes_v12_1_ring_funcs; + + ring->me = 3; + ring->pipe = pipe; + ring->queue = 0; + ring->xcc_id = xcc_id; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; + ring->no_scheduler = true; + snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", + (unsigned char)xcc_id, (unsigned char)ring->me, + (unsigned char)ring->pipe, (unsigned char)ring->queue); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = + (adev->doorbell_index.mes_ring0 + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; + else + ring->doorbell_index = + (adev->doorbell_index.mes_ring1 + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; + + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); +} + +static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_ring *ring; + int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); + + spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); + + ring = &adev->gfx.kiq[xcc_id].ring; + + ring->me = 3; + ring->pipe = 1; + ring->queue = 0; + ring->xcc_id = xcc_id; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; + ring->no_scheduler = true; + ring->doorbell_index = + (adev->doorbell_index.mes_ring1 + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; + + snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", + (unsigned char)xcc_id, (unsigned char)ring->me, + (unsigned char)ring->pipe, (unsigned char)ring->queue); + + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); +} + +static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, mqd_size = sizeof(struct v12_1_mes_mqd); + struct amdgpu_ring *ring; + int inst = MES_PIPE_INST(xcc_id, pipe); + + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + ring = &adev->gfx.kiq[xcc_id].ring; + else + ring = &adev->mes.ring[inst]; + + if (ring->mqd_obj) + return 0; + + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); + return r; + } + + memset(ring->mqd_ptr, 0, mqd_size); + + /* prepare MQD backup */ + adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->mes.mqd_backup[inst]) + dev_warn(adev->dev, + "no memory to create MQD backup for ring %s\n", + ring->name); + + return 0; +} + +static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + adev->mes.funcs = &mes_v12_1_funcs; + adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; + adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; + adev->mes.enable_legacy_queue_map = true; + + adev->mes.event_log_size = + adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; + + r = amdgpu_mes_init(adev); + if (r) + return r; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); + if (r) + return r; + + r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); + if (r) + return r; + + if (!adev->enable_uni_mes && pipe == + AMDGPU_MES_KIQ_PIPE) + r = mes_v12_1_kiq_ring_init(adev, xcc_id); + else + r = mes_v12_1_ring_init(adev, xcc_id, pipe); + if (r) + return r; + + if (adev->enable_uni_mes && num_xcc > 1) { + r = mes_v12_1_allocate_shared_cmd_buf(adev, + pipe, xcc_id); + if (r) + return r; + } + } + } + + return 0; +} + +static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + inst = MES_PIPE_INST(xcc_id, pipe); + + amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], + &adev->mes.shared_cmd_buf_gpu_addr[inst], + NULL); + + kfree(adev->mes.mqd_backup[inst]); + + amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], + &adev->mes.eop_gpu_addr[inst], + NULL); + amdgpu_ucode_release(&adev->mes.fw[inst]); + + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, + &adev->mes.ring[inst].mqd_gpu_addr, + &adev->mes.ring[inst].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[inst]); + } + } + } + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, + &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, + &adev->gfx.kiq[xcc_id].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + mes_v12_1_free_ucode_buffers(adev, + AMDGPU_MES_KIQ_PIPE, xcc_id); + mes_v12_1_free_ucode_buffers(adev, + AMDGPU_MES_SCHED_PIPE, xcc_id); + } + } + + amdgpu_mes_fini(adev); + return 0; +} + +static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + int i; + + mutex_lock(&adev->srbm_mutex); + soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, + GET_INST(GC, xcc_id)); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); + + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; +} + +static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); +} + +static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) +{ + int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); + int r = 0; + struct amdgpu_ip_block *ip_block; + + if (adev->enable_uni_mes) + mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); + else + mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + + r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, + false, xcc_id); + if (r) { + DRM_ERROR("failed to load MES fw, r=%d\n", r); + return r; + } + + r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, + true, xcc_id); + if (r) { + DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); + return r; + } + + mes_v12_1_set_ucode_start_addr(adev, xcc_id); + + } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + mes_v12_1_set_ucode_start_addr(adev, xcc_id); + + mes_v12_1_enable(adev, true, xcc_id); + + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); + if (unlikely(!ip_block)) { + dev_err(adev->dev, "Failed to get MES handle\n"); + return -EINVAL; + } + + r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); + if (r) + goto failure; + + if (adev->enable_uni_mes) { + r = mes_v12_1_set_hw_resources(&adev->mes, + AMDGPU_MES_KIQ_PIPE, xcc_id); + if (r) + goto failure; + + mes_v12_1_set_hw_resources_1(&adev->mes, + AMDGPU_MES_KIQ_PIPE, xcc_id); + } + + if (adev->mes.enable_legacy_queue_map) { + r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); + if (r) + goto failure; + } + + return r; + +failure: + mes_v12_1_hw_fini(ip_block); + return r; +} + +static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) +{ + int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); + + if (adev->mes.ring[inst].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[inst], + RESET_QUEUES, 0, 0, xcc_id); + else + mes_v12_1_kiq_dequeue_sched(adev, xcc_id); + + adev->mes.ring[inst].sched.ready = false; + } + + mes_v12_1_enable(adev, false, xcc_id); + + return 0; +} + +static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id) +{ + u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask); + int r = 0; + + if (num_xcc == 1) + return r; + + if (adev->gfx.funcs && + adev->gfx.funcs->get_xccs_per_xcp) + num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); + else + return -EINVAL; + + switch (adev->xcp_mgr->mode) { + case AMDGPU_SPX_PARTITION_MODE: + adev->mes.enable_coop_mode = 1; + adev->mes.master_xcc_ids[xcc_id] = 0; + break; + case AMDGPU_DPX_PARTITION_MODE: + adev->mes.enable_coop_mode = 1; + adev->mes.master_xcc_ids[xcc_id] = + (xcc_id/num_xcc_per_xcp) * (num_xcc / 2); + break; + case AMDGPU_QPX_PARTITION_MODE: + adev->mes.enable_coop_mode = 1; + adev->mes.master_xcc_ids[xcc_id] = + (xcc_id/num_xcc_per_xcp) * (num_xcc / 4); + break; + case AMDGPU_CPX_PARTITION_MODE: + adev->mes.enable_coop_mode = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) +{ + int r; + struct amdgpu_device *adev = ip_block->adev; + + if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) + goto out; + + if (!adev->enable_mes_kiq) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = mes_v12_1_load_microcode(adev, + AMDGPU_MES_SCHED_PIPE, true, xcc_id); + if (r) { + DRM_ERROR("failed to MES fw, r=%d\n", r); + return r; + } + + mes_v12_1_set_ucode_start_addr(adev, xcc_id); + + } else if (adev->firmware.load_type == + AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + + mes_v12_1_set_ucode_start_addr(adev, xcc_id); + } + + mes_v12_1_enable(adev, true, xcc_id); + } + + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); + + r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); + if (r) + goto failure; + + r = mes_v12_1_set_hw_resources(&adev->mes, + AMDGPU_MES_SCHED_PIPE, xcc_id); + if (r) + goto failure; + + if (adev->enable_uni_mes) { + r = mes_v12_1_setup_coop_mode(adev, xcc_id); + if (r) + goto failure; + mes_v12_1_set_hw_resources_1(&adev->mes, + AMDGPU_MES_SCHED_PIPE, xcc_id); + } + mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); + + r = mes_v12_1_query_sched_status(&adev->mes, + AMDGPU_MES_SCHED_PIPE, xcc_id); + if (r) { + DRM_ERROR("MES is busy\n"); + goto failure; + } + +out: + /* + * Disable KIQ ring usage from the driver once MES is enabled. + * MES uses KIQ ring exclusively so driver cannot access KIQ ring + * with MES enabled. + */ + adev->gfx.kiq[xcc_id].ring.sched.ready = false; + adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; + + return 0; + +failure: + mes_v12_1_hw_fini(ip_block); + return r; +} + +static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); + if (r) + return r; + } + + return 0; +} + +static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + return 0; +} + +static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) +{ + int r; + + r = amdgpu_mes_suspend(ip_block->adev); + if (r) + return r; + + return mes_v12_1_hw_fini(ip_block); +} + +static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) +{ + int r; + + r = mes_v12_1_hw_init(ip_block); + if (r) + return r; + + return amdgpu_mes_resume(ip_block->adev); +} + +static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int pipe, r; + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + r = amdgpu_mes_init_microcode(adev, pipe); + if (r) + return r; + } + + return 0; +} + +static const struct amd_ip_funcs mes_v12_1_ip_funcs = { + .name = "mes_v12_1", + .early_init = mes_v12_1_early_init, + .late_init = NULL, + .sw_init = mes_v12_1_sw_init, + .sw_fini = mes_v12_1_sw_fini, + .hw_init = mes_v12_1_hw_init, + .hw_fini = mes_v12_1_hw_fini, + .suspend = mes_v12_1_suspend, + .resume = mes_v12_1_resume, +}; + +const struct amdgpu_ip_block_version mes_v12_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_MES, + .major = 12, + .minor = 1, + .rev = 0, + .funcs = &mes_v12_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.h new file mode 100644 index 000000000000..cfc048f652b0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MES_V12_1_H__ +#define __MES_V12_1_H__ + +extern const struct amdgpu_ip_block_version mes_v12_1_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index f6fc9778bc30..daf1f8ad4cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -163,6 +163,35 @@ static const char *mmhub_client_ids_v3_3_1[][2] = { [32+30][1] = "VCN1", }; +static const char *mmhub_client_ids_v3_4[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [5][0] = "MPXSP", + [6][0] = "MPASP", + [7][0] = "MP1", + [8][0] = "MPM", + [23][0] = "HDP", + [24][0] = "LSDMA", + [25][0] = "JPEG", + [26][0] = "VPE", + [27][0] = "VSCH", + [28][0] = "VCNU", + [30][0] = "VCNRD", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [5][1] = "MPXSP", + [6][1] = "MPASAP", + [7][1] = "MP1", + [8][1] = "MPM", + [21][1] = "OSSSYS", + [23][1] = "HDP", + [24][1] = "LSDMA", + [25][1] = "JPEG", + [26][1] = "VPE", + [27][1] = "VSCH", + [29][1] = "VCNWR", +}; + static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid, uint32_t flush_type) { @@ -211,6 +240,11 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, mmhub_client_ids_v3_3_1[cid][rw] : cid == 0x140 ? "UMSCH" : NULL; break; + case IP_VERSION(3, 4, 0): + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_4) ? + mmhub_client_ids_v3_4[cid][rw] : + cid == 0x140 ? "UMSCH" : NULL; + break; default: mmhub_cid = NULL; break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c new file mode 100644 index 000000000000..7e917eb47a8c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c @@ -0,0 +1,916 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v4_2_0.h" + +#include "mmhub/mmhub_4_2_0_offset.h" +#include "mmhub/mmhub_4_2_0_sh_mask.h" + +#include "soc15_common.h" +#include "soc24_enum.h" + +#define regMMVM_L2_CNTL3_DEFAULT 0x80100007 +#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0 + +static const char *mmhub_client_ids_v4_2_0[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [5][0] = "DCEVGA", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPIO", + [16][0] = "HDP", + [17][0] = "LSDMA", + [18][0] = "JPEG", + [19][0] = "VCNU0", + [21][0] = "VSCH", + [22][0] = "VCNU1", + [23][0] = "VCN1", + [32+20][0] = "VCN0", + [2][1] = "DBGUNBIO", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [5][1] = "DCEVGA", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPIO", + [10][1] = "DBGU0", + [11][1] = "DBGU1", + [12][1] = "DBGU2", + [13][1] = "DBGU3", + [14][1] = "XDP", + [15][1] = "OSSSYS", + [16][1] = "HDP", + [17][1] = "LSDMA", + [18][1] = "JPEG", + [19][1] = "VCNU0", + [20][1] = "VCN0", + [21][1] = "VSCH", + [22][1] = "VCNU1", + [23][1] = "VCN1", +}; + +static u64 mmhub_v4_2_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base; + + base = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), + regMMMC_VM_FB_LOCATION_BASE_LO32); + base &= MMMC_VM_FB_LOCATION_BASE_LO32__FB_BASE_LO32_MASK; + base <<= 24; + + base |= ((u64)(MMMC_VM_FB_LOCATION_BASE_HI32__FB_BASE_HI1_MASK & + RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), + regMMMC_VM_FB_LOCATION_BASE_HI32)) << 56); + + return base; +} + +static u64 mmhub_v4_2_0_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), + regMMMC_VM_FB_OFFSET) << 24; +} + +static void mmhub_v4_2_0_mid_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base, + uint32_t mid_mask) +{ + struct amdgpu_vmhub *hub; + int i; + + for_each_inst(i, mid_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(i)]; + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); + } +} + +static void mmhub_v4_2_0_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + uint32_t mid_mask; + + mid_mask = adev->aid_mask; + mmhub_v4_2_0_mid_setup_vm_pt_regs(adev, vmid, + page_table_base, + mid_mask); +} + +static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + int i; + + if (adev->gmc.pdb0_bo) + pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); + else + pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v4_2_0_mid_setup_vm_pt_regs(adev, 0, pt_base, mid_mask); + + for_each_inst(i, mid_mask) { + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.fb_start >> 12)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.fb_start >> 44)); + + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.fb_end >> 12)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.fb_end >> 44)); + } else { + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + } + } +} + +static void mmhub_v4_2_0_mid_init_system_aperture_regs(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + uint64_t value; + uint32_t tmp; + int i; + + /* + * the new L1 policy will block SRIOV guest from writing + * these regs, and they will be programed at host. + * so skip programing these regs. + */ + if (amdgpu_sriov_vf(adev)) + return; + + for_each_inst(i, mid_mask) { + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BASE_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BASE_HI32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_LO32, + lower_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_HI32, + upper_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_LO32, + lower_32_bits(adev->gmc.agp_end >> 24)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_HI32, + upper_32_bits(adev->gmc.agp_end >> 24)); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + lower_32_bits(min(adev->gmc.fb_start, + adev->gmc.agp_start) >> 18)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + upper_32_bits(min(adev->gmc.fb_start, + adev->gmc.agp_start) >> 18)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, + lower_32_bits(max(adev->gmc.fb_end, + adev->gmc.agp_end) >> 18)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, + upper_32_bits(max(adev->gmc.fb_end, + adev->gmc.agp_end) >> 18)); + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ENABLE_RETRY_FAULT_INTERRUPT, 0x1); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); + } + + /* In the case squeezing vram into GART aperture, we don't use + * FB aperture and AGP aperture. Disable them. + */ + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_TOP_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_TOP_HI32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_BASE_LO32, 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_BASE_HI32, 1); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_HI32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_LO32, 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_HI32, 1); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + 0x7F); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); + } +} + +static void mmhub_v4_2_0_mid_init_tlb_regs(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + uint32_t tmp; + int i; + + for_each_inst(i, mid_mask) { + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_MX_L1_TLB_CNTL, tmp); + } +} + +static void mmhub_v4_2_0_mid_init_cache_regs(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + uint32_t tmp; + int i; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + for_each_inst(i, mid_mask) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, + INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, + INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL2, tmp); + + tmp = regMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL3, tmp); + + tmp = regMMVM_L2_CNTL4_DEFAULT; + /* For AMD APP APUs setup WC memory */ + if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 1); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + } + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL4, tmp); + + tmp = regMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, + L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL5, tmp); + } +} + +static void mmhub_v4_2_0_mid_enable_system_domain(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + uint32_t tmp; + int i; + + for_each_inst(i, mid_mask) { + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_CNTL, tmp); + } +} + +static void mmhub_v4_2_0_mid_disable_identity_aperture(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + int i; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + for_each_inst(i, mid_mask) { + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x00001FFF); + + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); + } +} + +static void mmhub_v4_2_0_mid_setup_vmid_config(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + struct amdgpu_vmhub *hub; + uint32_t tmp; + int i, j; + + for_each_inst(j, mid_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), + regMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + } + + hub->vm_cntx_cntl = tmp; +} + +static void mmhub_v4_2_0_mid_program_invalidation(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + struct amdgpu_vmhub *hub; + unsigned int i, j; + + for_each_inst(j, mid_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x3fff); + } + } +} + +static int mmhub_v4_2_0_mid_gart_enable(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + /* GART Enable. */ + mmhub_v4_2_0_mid_init_gart_aperture_regs(adev, mid_mask); + mmhub_v4_2_0_mid_init_system_aperture_regs(adev, mid_mask); + mmhub_v4_2_0_mid_init_tlb_regs(adev, mid_mask); + mmhub_v4_2_0_mid_init_cache_regs(adev, mid_mask); + + mmhub_v4_2_0_mid_enable_system_domain(adev, mid_mask); + mmhub_v4_2_0_mid_disable_identity_aperture(adev, mid_mask); + mmhub_v4_2_0_mid_setup_vmid_config(adev, mid_mask); + mmhub_v4_2_0_mid_program_invalidation(adev, mid_mask); + + return 0; +} +static int mmhub_v4_2_0_gart_enable(struct amdgpu_device *adev) +{ + uint32_t mid_mask; + + mid_mask = adev->aid_mask; + return mmhub_v4_2_0_mid_gart_enable(adev, mid_mask); +} + +static void mmhub_v4_2_0_mid_gart_disable(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + struct amdgpu_vmhub *hub; + u32 tmp; + u32 i, j; + + for_each_inst(j, mid_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), + regMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, j), + regMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, j), + regMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, j), regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, j), regMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, j), regMMVM_L2_CNTL3, 0); + } +} + +static void mmhub_v4_2_0_gart_disable(struct amdgpu_device *adev) +{ + uint32_t mid_mask; + + mid_mask = adev->aid_mask; + mmhub_v4_2_0_mid_gart_disable(adev, mid_mask); +} + +static void +mmhub_v4_2_0_mid_set_fault_enable_default(struct amdgpu_device *adev, + bool value, uint32_t mid_mask) +{ + u32 tmp; + int i; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + for_each_inst(i, mid_mask) { + tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_CNTL_LO32); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32, + CRASH_ON_NO_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_CNTL_LO32, tmp); + } +} + + +/** + * mmhub_v4_2_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void +mmhub_v4_2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + uint32_t mid_mask; + + mid_mask = adev->aid_mask; + mmhub_v4_2_0_mid_set_fault_enable_default(adev, value, mid_mask); +} + +static uint32_t mmhub_v4_2_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE3, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/*TODO: l2 protection fault status is increased to 64bits. + * some critical fields like FED are moved to STATUS_HI32 */ +static void +mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + uint32_t cid, rw; + const char *mmhub_cid = NULL; + + cid = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); + rw = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW); + + dev_err(adev->dev, + "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", + status); + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(4, 2, 0): + mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; + } + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + + +static const struct amdgpu_vmhub_funcs mmhub_v4_2_0_vmhub_funcs = { + .print_l2_protection_fault_status = mmhub_v4_2_0_print_l2_protection_fault_status, + .get_invalidate_req = mmhub_v4_2_0_get_invalidate_req, +}; + +static void mmhub_v4_2_0_mid_init(struct amdgpu_device *adev, + uint32_t mid_mask) +{ + struct amdgpu_vmhub *hub; + int i; + + for_each_inst(i, mid_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(i)]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_CONTEXT0_CNTL); + /* TODO: add a new member to accomandate additional fault status/cntl reg */ + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_STATUS_LO32); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), + regMMVM_L2_PROTECTION_FAULT_CNTL_LO32); + + hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ - + regMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vm_l2_bank_select_reserved_cid2 = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_BANK_SELECT_RESERVED_CID2); + + hub->vm_contexts_disable = + SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), regMMVM_CONTEXTS_DISABLE); + + hub->vmhub_funcs = &mmhub_v4_2_0_vmhub_funcs; + } +} + +static void mmhub_v4_2_0_init(struct amdgpu_device *adev) +{ + uint32_t mid_mask; + + mid_mask = adev->aid_mask; + mmhub_v4_2_0_mid_init(adev, mid_mask); +} + +static void +mmhub_v4_2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + uint32_t def1, data1, def2 = 0, data2 = 0; + def = data = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG); + def1 = data1 = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB0_CNTL_MISC2); + def2 = data2 = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB1_CNTL_MISC2); + + if (enable) { + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + + data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + } else { + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + + data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + } + + if (def != data) + WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG, data); + if (def1 != data1) + WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB0_CNTL_MISC2, data1); + + if (def2 != data2) + WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB1_CNTL_MISC2, data2); +} + +static void +mmhub_v4_2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG, data); +} + +static int mmhub_v4_2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) + mmhub_v4_2_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + + if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) + mmhub_v4_2_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + + return 0; +} + +static void mmhub_v4_2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} + +const struct amdgpu_mmhub_funcs mmhub_v4_2_0_funcs = { + .init = mmhub_v4_2_0_init, + .get_fb_location = mmhub_v4_2_0_get_fb_location, + .get_mc_fb_offset = mmhub_v4_2_0_get_mc_fb_offset, + .setup_vm_pt_regs = mmhub_v4_2_0_setup_vm_pt_regs, + .gart_enable = mmhub_v4_2_0_gart_enable, + .gart_disable = mmhub_v4_2_0_gart_disable, + .set_fault_enable_default = mmhub_v4_2_0_set_fault_enable_default, + .set_clockgating = mmhub_v4_2_0_set_clockgating, + .get_clockgating = mmhub_v4_2_0_get_clockgating, +}; + +static int mmhub_v4_2_0_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool value; + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + mmhub_v4_2_0_mid_set_fault_enable_default(adev, value, inst_mask); + + if (!amdgpu_sriov_vf(adev)) + return mmhub_v4_2_0_mid_gart_enable(adev, inst_mask); + + return 0; +} + +static int mmhub_v4_2_0_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!amdgpu_sriov_vf(adev)) + mmhub_v4_2_0_mid_gart_disable(adev, inst_mask); + + return 0; +} + +struct amdgpu_xcp_ip_funcs mmhub_v4_2_0_xcp_funcs = { + .suspend = &mmhub_v4_2_0_xcp_suspend, + .resume = &mmhub_v4_2_0_xcp_resume +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h new file mode 100644 index 000000000000..4ea6de7ac7ae --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h @@ -0,0 +1,28 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V4_2_0_H__ +#define __MMHUB_V4_2_0_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v4_2_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index e7cd07383d56..f2e456390b27 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -96,7 +96,9 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev) timeout -= 5; } while (timeout > 1); - dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT); + dev_err(adev->dev, + "Doesn't get TRN_MSG_ACK from pf in %d msec\n", + NV_MAILBOX_POLL_ACK_TIMEDOUT); return -ETIME; } @@ -209,6 +211,9 @@ send_request: case IDH_REQ_RAS_CHK_CRITI: event = IDH_REQ_RAS_CHK_CRITI_READY; break; + case IDH_REQ_RAS_REMOTE_CMD: + event = IDH_REQ_RAS_REMOTE_CMD_READY; + break; default: break; } @@ -585,6 +590,13 @@ static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0); } +static int xgpu_nv_req_remote_ras_cmd(struct amdgpu_device *adev, + u32 param1, u32 param2, u32 param3) +{ + return xgpu_nv_send_access_requests_with_param( + adev, IDH_REQ_RAS_REMOTE_CMD, param1, param2, param3); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -598,5 +610,6 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_ras_err_count = xgpu_nv_req_ras_err_count, .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, .req_bad_pages = xgpu_nv_req_ras_bad_pages, - .req_ras_chk_criti = xgpu_nv_check_vf_critical_region + .req_ras_chk_criti = xgpu_nv_check_vf_critical_region, + .req_remote_ras_cmd = xgpu_nv_req_remote_ras_cmd, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index c1083e5e41e0..dc57a4f697ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -43,7 +43,8 @@ enum idh_request { IDH_REQ_RAS_ERROR_COUNT = 203, IDH_REQ_RAS_CPER_DUMP = 204, IDH_REQ_RAS_BAD_PAGES = 205, - IDH_REQ_RAS_CHK_CRITI = 206 + IDH_REQ_RAS_CHK_CRITI = 206, + IDH_REQ_RAS_REMOTE_CMD = 207, }; enum idh_event { @@ -64,6 +65,7 @@ enum idh_event { IDH_RAS_BAD_PAGES_NOTIFICATION = 16, IDH_UNRECOV_ERR_NOTIFICATION = 17, IDH_REQ_RAS_CHK_CRITI_READY = 18, + IDH_REQ_RAS_REMOTE_CMD_READY = 19, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index 9b4025c39e44..db14a1a326d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -30,6 +30,31 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL_nbif_4_10 0x4f0aeb +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL1_nbif_4_10 0x4f0aec +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL1_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL_nbif_4_10 0x4f0aed +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL1_nbif_4_10 0x4f0aee +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL1_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL_nbif_4_10 0x4f0aef +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL1_nbif_4_10 0x4f0af0 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL1_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL_nbif_4_10 0x4f0af1 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL1_nbif_4_10 0x4f0af2 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL1_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL_nbif_4_10 0x4f0af3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL1_nbif_4_10 0x4f0af4 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL1_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10 0x4f0af5 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10_BASE_IDX 3 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL1_nbif_4_10 0x4f0af6 +#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL1_nbif_4_10_BASE_IDX 3 + static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -98,7 +123,11 @@ static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev, S2A_DOORBELL_PORT2_RANGE_SIZE, 0); - WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range); + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) { + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL_nbif_4_10, doorbell_range); + } else { + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range); + } } } @@ -140,16 +169,28 @@ static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev, S2A_DOORBELL_PORT4_RANGE_SIZE, 0); - if (instance) - WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range); - else - WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range); + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) { + if (instance) + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10, doorbell_range); + else + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL_nbif_4_10, doorbell_range); + } else { + if (instance) + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range); + else + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range); + } } static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev) { - WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007); - WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d); + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) { + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL_nbif_4_10, 0x30000007); + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL_nbif_4_10, 0x3000000d); + } else { + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007); + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d); + } } static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev, @@ -214,7 +255,11 @@ static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev, S2A_DOORBELL_PORT1_RANGE_SIZE, 0); - WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range); + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) { + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL_nbif_4_10, ih_doorbell_range); + } else { + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range); + } } static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev) @@ -269,12 +314,20 @@ static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev) { - return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) { + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX); + } + else { + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); + } } static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev) { - return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA); + else + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); } const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index bed5ef4d8788..05cbd65420d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -279,6 +279,7 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev) case IP_VERSION(7, 11, 1): case IP_VERSION(7, 11, 2): case IP_VERSION(7, 11, 3): + case IP_VERSION(7, 11, 4): data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23); WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data); break; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 50e77d9b30af..f17c3839aea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -593,10 +593,6 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev) adev->doorbell_index.sdma_doorbell_range = 20; } -static void nv_pre_asic_init(struct amdgpu_device *adev) -{ -} - static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { @@ -630,7 +626,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = { .need_reset_on_init = &nv_need_reset_on_init, .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &amdgpu_dpm_is_baco_supported, - .pre_asic_init = &nv_pre_asic_init, .update_umd_stable_pstate = &nv_update_umd_stable_pstate, .query_video_codecs = &nv_query_video_codecs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 73f87131a7e9..e8f768638fd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -299,6 +299,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */ GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */ GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */ + GFX_FW_TYPE_RLX6_UCODE_CORE1 = 98, /* RLCV_IRAM MI */ + GFX_FW_TYPE_RLX6_DRAM_BOOT_CORE1 = 99, /* RLCV DRAM BOOT MI */ GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */ GFX_FW_TYPE_VPE = 102, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 3584b8c18fd9..8a6431487ed0 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -140,7 +140,7 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp, static int psp_v10_0_mode1_reset(struct psp_context *psp) { - DRM_INFO("psp mode 1 reset not supported now! \n"); + drm_info(adev_to_drm(psp->adev), "psp mode 1 reset not supported now!\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index a9be7a505026..9aa988982304 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -412,7 +412,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) MBOX_TOS_READY_MASK, 0); if (ret) { - DRM_INFO("psp is not working correctly before mode1 reset!\n"); + drm_info(adev_to_drm(adev), "psp is not working correctly before mode1 reset!\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 4c6450d62299..c3cae29eeca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -225,7 +225,7 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) MBOX_TOS_READY_MASK, 0); if (ret) { - DRM_INFO("psp is not working correctly before mode1 reset!\n"); + drm_info(adev_to_drm(adev), "psp is not working correctly before mode1 reset!\n"); return -EINVAL; } @@ -240,11 +240,11 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) 0); if (ret) { - DRM_INFO("psp mode 1 reset failed!\n"); + drm_info(adev_to_drm(adev), "psp mode 1 reset failed!\n"); return -EINVAL; } - DRM_INFO("psp mode1 reset succeed \n"); + drm_info(adev_to_drm(adev), "psp mode1 reset succeed\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c new file mode 100644 index 000000000000..3aca293e2f0c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c @@ -0,0 +1,202 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drm_drv.h> +#include <linux/vmalloc.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v15_0.h" + +#include "mp/mp_15_0_0_offset.h" +#include "mp/mp_15_0_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/psp_15_0_0_toc.bin"); + +static int psp_v15_0_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + char ucode_prefix[30]; + int err = 0; + + amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + err = psp_init_toc_microcode(psp, ucode_prefix); + if (err) + return err; + + return 0; +} + +static int psp_v15_0_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v15_0_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v15_0_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v15_0_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v15_0_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v15_0_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + + return data; +} + +static void psp_v15_0_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); +} + +static const struct psp_funcs psp_v15_0_0_funcs = { + .init_microcode = psp_v15_0_0_init_microcode, + .ring_create = psp_v15_0_0_ring_create, + .ring_stop = psp_v15_0_0_ring_stop, + .ring_destroy = psp_v15_0_0_ring_destroy, + .ring_get_wptr = psp_v15_0_0_ring_get_wptr, + .ring_set_wptr = psp_v15_0_0_ring_set_wptr, +}; + +void psp_v15_0_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v15_0_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.h new file mode 100644 index 000000000000..ebd612103526 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V15_0_0_H__ +#define __PSP_V15_0_0_H__ + +#include "amdgpu_psp.h" + +void psp_v15_0_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c new file mode 100644 index 000000000000..5249f5bd2a10 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c @@ -0,0 +1,342 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drm_drv.h> +#include <linux/vmalloc.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v15_0_8.h" + +#include "mp/mp_15_0_8_offset.h" +#include "mp/mp_15_0_8_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/psp_15_0_8_toc.bin"); + +static int psp_v15_0_8_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + char ucode_prefix[30]; + int err = 0; + + amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + err = psp_init_toc_microcode(psp, ucode_prefix); + if (err) + return err; + + return 0; +} + +static int psp_v15_0_8_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v15_0_8_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v15_0_8_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v15_0_8_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v15_0_8_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v15_0_8_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + + return data; +} + +static void psp_v15_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); +} + +static int psp_v15_0_8_get_fw_type(struct amdgpu_firmware_info *ucode, + enum psp_gfx_fw_type *type) +{ + switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_CAP: + *type = GFX_FW_TYPE_CAP; + break; + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_SDMA2: + *type = GFX_FW_TYPE_SDMA2; + break; + case AMDGPU_UCODE_ID_SDMA3: + *type = GFX_FW_TYPE_SDMA3; + break; + case AMDGPU_UCODE_ID_SDMA4: + *type = GFX_FW_TYPE_SDMA4; + break; + case AMDGPU_UCODE_ID_SDMA5: + *type = GFX_FW_TYPE_SDMA5; + break; + case AMDGPU_UCODE_ID_SDMA6: + *type = GFX_FW_TYPE_SDMA6; + break; + case AMDGPU_UCODE_ID_SDMA7: + *type = GFX_FW_TYPE_SDMA7; + break; + case AMDGPU_UCODE_ID_CP_MES: + *type = GFX_FW_TYPE_RS64_MES; + break; + case AMDGPU_UCODE_ID_CP_MES_DATA: + *type = GFX_FW_TYPE_RS64_MES_STACK; + break; + case AMDGPU_UCODE_ID_CP_MES1: + *type = GFX_FW_TYPE_RS64_KIQ; + break; + case AMDGPU_UCODE_ID_CP_MES1_DATA: + *type = GFX_FW_TYPE_RS64_KIQ_STACK; + break; + case AMDGPU_UCODE_ID_RLC_P: + *type = GFX_FW_TYPE_RLC_P; + break; + case AMDGPU_UCODE_ID_RLC_V: + *type = GFX_FW_TYPE_RLC_V; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_IRAM: + *type = GFX_FW_TYPE_RLC_IRAM; + break; + case AMDGPU_UCODE_ID_RLC_DRAM: + *type = GFX_FW_TYPE_RLC_DRAM_BOOT; + break; + case AMDGPU_UCODE_ID_RLC_IRAM_1: + *type = GFX_FW_TYPE_RLX6_UCODE_CORE1; + break; + case AMDGPU_UCODE_ID_RLC_DRAM_1: + *type = GFX_FW_TYPE_RLX6_DRAM_BOOT_CORE1; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_PPTABLE: + *type = GFX_FW_TYPE_PPTABLE; + break; + case AMDGPU_UCODE_ID_VCN: + *type = GFX_FW_TYPE_VCN; + break; + case AMDGPU_UCODE_ID_VCN1: + *type = GFX_FW_TYPE_VCN1; + break; + case AMDGPU_UCODE_ID_VCN0_RAM: + *type = GFX_FW_TYPE_VCN0_RAM; + break; + case AMDGPU_UCODE_ID_VCN1_RAM: + *type = GFX_FW_TYPE_VCN1_RAM; + break; + case AMDGPU_UCODE_ID_SDMA_UCODE_TH0: + case AMDGPU_UCODE_ID_SDMA_RS64: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA_UCODE_TH1: + *type = GFX_FW_TYPE_SDMA_UCODE_TH1; + break; + case AMDGPU_UCODE_ID_IMU_I: + *type = GFX_FW_TYPE_IMU_I; + break; + case AMDGPU_UCODE_ID_IMU_D: + *type = GFX_FW_TYPE_IMU_D; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC: + *type = GFX_FW_TYPE_RS64_MEC; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: + *type = GFX_FW_TYPE_RS64_MEC_P0_STACK; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: + *type = GFX_FW_TYPE_RS64_MEC_P1_STACK; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: + *type = GFX_FW_TYPE_RS64_MEC_P2_STACK; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: + *type = GFX_FW_TYPE_RS64_MEC_P3_STACK; + break; + case AMDGPU_UCODE_ID_UMSCH_MM_UCODE: + *type = GFX_FW_TYPE_UMSCH_UCODE; + break; + case AMDGPU_UCODE_ID_UMSCH_MM_DATA: + *type = GFX_FW_TYPE_UMSCH_DATA; + break; + case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER: + *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER; + break; + case AMDGPU_UCODE_ID_P2S_TABLE: + *type = GFX_FW_TYPE_P2S_TABLE; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +static const struct psp_funcs psp_v15_0_8_funcs = { + .init_microcode = psp_v15_0_8_init_microcode, + .ring_create = psp_v15_0_8_ring_create, + .ring_stop = psp_v15_0_8_ring_stop, + .ring_destroy = psp_v15_0_8_ring_destroy, + .ring_get_wptr = psp_v15_0_8_ring_get_wptr, + .ring_set_wptr = psp_v15_0_8_ring_set_wptr, + .get_fw_type = psp_v15_0_8_get_fw_type, +}; + +void psp_v15_0_8_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v15_0_8_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h new file mode 100644 index 000000000000..6bb1bb517007 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V15_0_8_H__ +#define __PSP_V15_0_8_H__ + +#include "amdgpu_psp.h" + +void psp_v15_0_8_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 833830bc3e2e..f5030efc6c80 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -315,7 +315,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0); if (ret) { - DRM_INFO("psp is not working correctly before mode1 reset!\n"); + drm_info(adev_to_drm(adev), "psp is not working correctly before mode1 reset!\n"); return -EINVAL; } @@ -329,11 +329,11 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0); if (ret) { - DRM_INFO("psp mode 1 reset failed!\n"); + drm_info(adev_to_drm(adev), "psp mode 1 reset failed!\n"); return -EINVAL; } - DRM_INFO("psp mode1 reset succeed \n"); + drm_info(adev_to_drm(adev), "psp mode1 reset succeed\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 8ddc4df06a1f..7811cbb1f7ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1031,7 +1031,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_device_wb_free(adev, index); return r; } @@ -1096,7 +1096,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err0; } @@ -1117,11 +1117,11 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); + drm_err(adev_to_drm(adev), "IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r); goto err1; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 51101b0aa2fa..dbe5b8f109f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -931,7 +931,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_device_wb_free(adev, index); return r; } @@ -995,7 +995,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err0; } @@ -1016,11 +1016,11 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); + drm_err(adev_to_drm(adev), "IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r); goto err1; } @@ -1325,8 +1325,8 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) ring->use_doorbell = true; ring->me = i; - DRM_INFO("use_doorbell being set to: [%s]\n", - ring->use_doorbell?"true":"false"); + drm_info(adev_to_drm(adev), "use_doorbell being set to: [%s]\n", + ring->use_doorbell?"true":"false"); ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 217040044987..eec659194718 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -54,6 +54,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_4.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -937,7 +938,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_device_wb_free(adev, index); return r; } @@ -1001,7 +1002,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err0; } @@ -1022,11 +1023,11 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); + drm_err(adev_to_drm(adev), "IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r); goto err1; } @@ -1268,6 +1269,17 @@ static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev) } } +/* all sizes are in bytes */ +#define SDMA6_CSA_SIZE 32 +#define SDMA6_CSA_ALIGNMENT 4 + +static void sdma_v6_0_get_csa_info(struct amdgpu_device *adev, + struct amdgpu_sdma_csa_info *csa_info) +{ + csa_info->size = SDMA6_CSA_SIZE; + csa_info->alignment = SDMA6_CSA_ALIGNMENT; +} + static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1300,6 +1312,7 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v6_0_set_irq_funcs(adev); sdma_v6_0_set_mqd_funcs(adev); sdma_v6_0_set_ras_funcs(adev); + adev->sdma.get_csa_info = &sdma_v6_0_get_csa_info; return 0; } @@ -1586,7 +1599,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 2b81344dcd66..8d16ef257bcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -816,7 +816,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0); if (r) return r; @@ -954,7 +954,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_device_wb_free(adev, index); return r; } @@ -1018,7 +1018,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); goto err0; } @@ -1039,11 +1039,11 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); + drm_err(adev_to_drm(adev), "IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r); goto err1; } @@ -1253,6 +1253,17 @@ static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +/* all sizes are in bytes */ +#define SDMA7_CSA_SIZE 32 +#define SDMA7_CSA_ALIGNMENT 4 + +static void sdma_v7_0_get_csa_info(struct amdgpu_device *adev, + struct amdgpu_sdma_csa_info *csa_info) +{ + csa_info->size = SDMA7_CSA_SIZE; + csa_info->alignment = SDMA7_CSA_ALIGNMENT; +} + static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1286,6 +1297,7 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v7_0_set_vm_pte_funcs(adev); sdma_v7_0_set_irq_funcs(adev); sdma_v7_0_set_mqd_funcs(adev); + adev->sdma.get_csa_info = &sdma_v7_0_get_csa_info; return 0; } @@ -1492,7 +1504,7 @@ static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring) ring->trail_seq += 1; r = amdgpu_ring_alloc(ring, 10); if (r) { - DRM_ERROR("ring %d failed to be allocated \n", ring->idx); + DRM_ERROR("ring %d failed to be allocated\n", ring->idx); return r; } sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c new file mode 100644 index 000000000000..5bc45c3e00d1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c @@ -0,0 +1,1817 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_trace.h" + +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h" + +#include "soc15_common.h" +#include "soc15.h" +#include "sdma_v7_1_0_pkt_open.h" +#include "nbio_v4_3.h" +#include "sdma_common.h" +#include "sdma_v7_1.h" +#include "v12_structs.h" +#include "mes_userqueue.h" +#include "soc_v1_0.h" + +MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin"); + +#define SDMA1_REG_OFFSET 0x600 +#define SDMA0_SDMA_IDX_0_END 0x450 +#define SDMA1_HYP_DEC_REG_OFFSET 0x30 + +static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS), +}; + +static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev); +static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev); +static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev); +static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v7_1_inst_start(struct amdgpu_device *adev, + uint32_t inst_mask); + +static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) +{ + u32 base; + u32 dev_inst = GET_INST(SDMA0, instance); + int xcc_id = adev->sdma.instance[instance].xcc_id; + int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc; + + if (internal_offset >= SDMA0_SDMA_IDX_0_END) { + base = adev->reg_offset[GC_HWIP][xcc_id][1]; + if (xcc_inst != 0) + internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst; + } else { + base = adev->reg_offset[GC_HWIP][xcc_id][0]; + if (xcc_inst != 0) + internal_offset += SDMA1_REG_OFFSET * xcc_inst; + } + + return base + internal_offset; +} + +static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 1); + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + +/** + * sdma_v7_1_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware. + */ +static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring) +{ + u64 *rptr; + + /* XXX check if swapping is necessary on BE */ + rptr = (u64 *)ring->rptr_cpu_addr; + + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); + return ((*rptr) >> 2); +} + +/** + * sdma_v7_1_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware. + */ +static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring) +{ + u64 wptr = 0; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); + } + + return wptr >> 2; +} + +/** + * sdma_v7_1_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware. + */ +static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + DRM_DEBUG("Setting write pointer\n"); + + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, + ring->me, + regSDMA0_SDMA_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, + ring->me, + regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } +} + +static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->funcs->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +/** + * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine + * + * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from + * @ib: IB object to schedule + * @flags: unused + * + * Schedule an IB in the DMA ring. + */ +static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + + /* An IB packet must end on a 8 DW boundary--the next dword + * must be on a 8-dword boundary. Our IB packet below is 6 + * dwords long, thus add x number of NOPs, such that, in + * modular arithmetic, + * wptr + 6 + x = 8k, k >= 0, which in C is, + * (wptr + 6 + x) % 8 = 0. + * The expression below, is a solution of x. + */ + sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); +} + +/** + * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse + * + * @ring: amdgpu ring pointer + * + * flush the IB by graphics cache rinse. + */ +static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring) +{ + uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | + SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | + SDMA_GCR_GLI_INV(1); + + /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) | + SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) | + SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0)); +} + + +/** + * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring + * + * @ring: amdgpu ring pointer + * @addr: address + * @seq: fence seq number + * @flags: fence flags + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed. + */ +static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + /* write the fence */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + /* optionally write high bits as well */ + if (write64bit) { + addr += 4; + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* generate an interrupt */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); + } +} + +/** + * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines + * + * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be disabled + * + * Stop the gfx async dma ring buffers. + */ +static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + u32 rb_cntl, ib_cntl; + int i; + + for_each_inst(i, inst_mask) { + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl); + } +} + +/** + * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines + * + * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be disabled + * + * Stop the compute async dma queues. + */ +static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + /* XXX todo */ +} + +/** + * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * @inst_mask: mask of dma engine instances to be enabled + * + * Halt or unhalt the async dma engines context switch. + */ +static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev, + bool enable, uint32_t inst_mask) +{ + int i; + + for_each_inst(i, inst_mask) { + WREG32_SOC15_IP(GC, + sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80); + } +} + +/** + * sdma_v7_1_inst_enable - stop the async dma engines + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. + * @inst_mask: mask of dma engine instances to be enabled + * + * Halt or unhalt the async dma engines. + */ +static void sdma_v7_1_inst_enable(struct amdgpu_device *adev, + bool enable, uint32_t inst_mask) +{ + u32 mcu_cntl; + int i; + + if (!enable) { + sdma_v7_1_inst_gfx_stop(adev, inst_mask); + sdma_v7_1_inst_rlc_stop(adev, inst_mask); + } + + if (amdgpu_sriov_vf(adev)) + return; + + for_each_inst(i, inst_mask) { + mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL)); + mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl); + } +} + +/** + * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine + * + * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart + * + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. + */ +static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) +{ + struct amdgpu_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 doorbell; + u32 doorbell_offset; + u32 temp; + u64 wptr_gpu_addr; + int r; + + ring = &adev->sdma.instance[i].ring; + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0); + } + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); + else + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); + + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + if (!restore) + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1); + + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } + + doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET)); + + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + if (i == 0) + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); + + if (amdgpu_sriov_vf(adev)) + sdma_v7_1_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0); + + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0); + temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl); + ring->sched.ready = true; + + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v7_1_inst_ctx_switch_enable(adev, true, i); + sdma_v7_1_inst_enable(adev, true, i); + } + + r = amdgpu_ring_test_helper(ring); + if (r) + ring->sched.ready = false; + + return r; +} + +/** + * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + int i, r; + + for_each_inst(i, inst_mask) { + r = sdma_v7_1_gfx_resume_instance(adev, i, false); + if (r) + return r; + } + + return 0; + +} + +/** + * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled + * + * Set up the compute DMA queues and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + return 0; +} + +static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + int i; + + for_each_inst(i, inst_mask) { + amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj, + &adev->sdma.instance[i].sdma_fw_gpu_addr, + (void **)&adev->sdma.instance[i].sdma_fw_ptr); + } +} + +/** + * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode + * + * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + const struct sdma_firmware_header_v3_0 *hdr; + const __le32 *fw_data; + u32 fw_size; + uint32_t tmp, sdma_status, ic_op_cntl; + int i, r, j; + + /* halt the MEs */ + sdma_v7_1_inst_enable(adev, false, inst_mask); + + if (!adev->sdma.instance[0].fw) + return -EINVAL; + + hdr = (const struct sdma_firmware_header_v3_0 *) + adev->sdma.instance[0].fw->data; + amdgpu_ucode_print_sdma_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data + + le32_to_cpu(hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(hdr->ucode_size_bytes); + + for_each_inst(i, inst_mask) { + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->sdma.instance[i].sdma_fw_obj, + &adev->sdma.instance[i].sdma_fw_gpu_addr, + (void **)&adev->sdma.instance[i].sdma_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r); + return r; + } + + memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj); + amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj); + + tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL)); + tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp); + + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO), + lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI), + upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); + + tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL)); + tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp); + + /* Wait for sdma ucode init complete */ + for (j = 0; j < adev->usec_timeout; j++) { + ic_op_cntl = RREG32_SOC15_IP(GC, + sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL)); + sdma_status = RREG32_SOC15_IP(GC, + sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG)); + if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) && + (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1)) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) { + dev_err(adev->dev, "failed to init sdma ucode\n"); + return -EINVAL; + } + } + + return 0; +} + +static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + uint32_t inst_mask; + u32 tmp; + int i; + + inst_mask = GENMASK(NUM_XCC(adev->sdma.sdma_mask) - 1, 0); + sdma_v7_1_inst_gfx_stop(adev, inst_mask); + + for_each_inst(i, inst_mask) { + //tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE)); + //tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK; + //WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp); + tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL)); + tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK; + tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK; + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp); + + WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0); + + udelay(100); + + tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i; + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + + udelay(100); + + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + + udelay(100); + } + + return sdma_v7_1_inst_start(adev, inst_mask); +} + +static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r; + long tmo = msecs_to_jiffies(1000); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + return false; +} + +static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); + return -EINVAL; + } + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0); + if (r) + return r; + + r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +/** + * sdma_v7_1_inst_start - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled + * + * Set up the DMA engines and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_1_inst_start(struct amdgpu_device *adev, + uint32_t inst_mask) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v7_1_inst_enable(adev, false, inst_mask); + + /* set RB registers */ + r = sdma_v7_1_inst_gfx_resume(adev, inst_mask); + return r; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = sdma_v7_1_inst_load_microcode(adev, inst_mask); + if (r) { + sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask); + return r; + } + + if (amdgpu_emu_mode == 1) + msleep(1000); + } + + /* unhalt the MEs */ + sdma_v7_1_inst_enable(adev, true, inst_mask); + /* enable sdma ring preemption */ + sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask); + + /* start the gfx rings and rlc compute queues */ + r = sdma_v7_1_inst_gfx_resume(adev, inst_mask); + if (r) + return r; + r = sdma_v7_1_inst_rlc_resume(adev, inst_mask); + + return r; +} + +static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd, + struct amdgpu_mqd_prop *prop) +{ + struct v12_sdma_mqd *m = mqd; + uint64_t wb_gpu_addr; + + m->sdmax_rlcx_rb_cntl = + order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | + 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | + 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); + + wb_gpu_addr = prop->wptr_gpu_addr; + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); + + wb_gpu_addr = prop->rptr_gpu_addr; + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); + + m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0, + regSDMA0_SDMA_QUEUE0_IB_CNTL)); + + m->sdmax_rlcx_doorbell_offset = + prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1); + + m->sdmax_rlcx_doorbell_log = 0; + m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT; + m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT; + + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + + return 0; +} + +static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev) +{ + adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd); + adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init; +} + +/** + * sdma_v7_1_ring_test_ring - simple async dma engine test + * + * @ring: amdgpu_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + unsigned i; + unsigned index; + int r; + u32 tmp; + u64 gpu_addr; + + tmp = 0xCAFEDEAD; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + amdgpu_device_wb_free(adev, index); + return r; + } + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + amdgpu_device_wb_free(adev, index); + + return r; +} + +/** + * sdma_v7_1_ring_test_ib - test an IB on the DMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT + * + * Test a simple IB in the DMA ring. + * Returns 0 on success, error on failure. + */ +static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + long r; + u32 tmp = 0; + u64 gpu_addr; + + tmp = 0xCAFEDEAD; + memset(&ib, 0, sizeof(ib)); + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } + + ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib.ptr[1] = lower_32_bits(gpu_addr); + ib.ptr[2] = upper_32_bits(gpu_addr); + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + ib.ptr[4] = 0xDEADBEEF; + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err1; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + + tmp = le32_to_cpu(adev->wb.wb[index]); + + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; + +err1: + amdgpu_ib_free(&ib, NULL); + dma_fence_put(f); +err0: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/** + * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @src: src addr to copy from + * @count: number of page entries to update + * + * Update PTEs by copying them from the GART using sDMA. + */ +static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count) +{ + unsigned bytes = count * 8; + + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + + ib->ptr[ib->length_dw++] = bytes - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + +} + +/** + * sdma_v7_1_vm_write_pte - update PTEs by writing them manually + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @value: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * + * Update PTEs by writing them manually using sDMA. + */ +static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr) +{ + unsigned ndw = count * 2; + + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw - 1; + for (; ndw > 0; ndw -= 2) { + ib->ptr[ib->length_dw++] = lower_32_bits(value); + ib->ptr[ib->length_dw++] = upper_32_bits(value); + value += incr; + } +} + +/** + * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA. + */ +static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + /* for physically contiguous pages (vram) */ + u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE); + + if (amdgpu_mtype_local) + header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3); + else + header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) | + SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) | + SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3)); + + ib->ptr[ib->length_dw++] = header; + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ +} + +/** + * sdma_v7_1_ring_pad_ib - pad the IB + * + * @ring: amdgpu ring pointer + * @ib: indirect buffer to fill with padding + * + * Pad the IB with NOPs to a boundary multiple of 8. + */ +static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + u32 pad_count; + int i; + + pad_count = (-ib->length_dw) & 0x7; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP); +} + +/** + * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline + * + * @ring: amdgpu_ring pointer + * + * Make sure all previous operations are completed (CIK). + */ +static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ +} + +/** + * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address + * + * Update the page table base and flush the VM TLB + * using sDMA. + */ +static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + /* SRBM WRITE command will not support on sdma v7. + * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE + */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE)); + amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2); + amdgpu_ring_write(ring, val); +} + +static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); +} + +static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + +static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_sdma_init_microcode(adev, 0, true); + if (r) { + DRM_ERROR("Failed to init sdma firmware!\n"); + return r; + } + + sdma_v7_1_set_ring_funcs(adev); + sdma_v7_1_set_buffer_funcs(adev); + sdma_v7_1_set_vm_pte_funcs(adev); + sdma_v7_1_set_irq_funcs(adev); + sdma_v7_1_set_mqd_funcs(adev); + + return 0; +} + +static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_ring *ring; + int r, i; + struct amdgpu_device *adev = ip_block->adev; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1); + uint32_t *ptr; + u32 xcc_id; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX, + GFX_12_1_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->me = i; + + for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) { + if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id)) + break; + } + + DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n", + xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc, + ring->use_doorbell?"true":"false"); + + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset + + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + sprintf(ring->name, "sdma%d.%d", xcc_id, + GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + } + + adev->sdma.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); + if (!amdgpu_sriov_vf(adev) && + !adev->debug_disable_gpu_ring_reset) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_sdma_sysfs_reset_mask_init(adev); + if (r) + return r; + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; +#endif + + return r; +} + +static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + + amdgpu_sdma_sysfs_reset_mask_fini(adev); + amdgpu_sdma_destroy_inst_ctx(adev, true); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) + sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask); + + kfree(adev->sdma.ip_dump); + + return 0; +} + +static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + uint32_t inst_mask; + + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); + + return sdma_v7_1_inst_start(adev, inst_mask); +} + +static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + if (amdgpu_sriov_vf(adev)) + return 0; + + sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask); + sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask); + + return 0; +} + +static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block) +{ + return sdma_v7_1_hw_fini(ip_block); +} + +static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block) +{ + return sdma_v7_1_hw_init(ip_block); +} + +static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + u32 i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG)); + + if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK)) + return false; + } + + return true; +} + +static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + unsigned i, j; + u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; + struct amdgpu_device *adev = ip_block->adev; + + for (i = 0; i < adev->usec_timeout; i++) { + for (j = 0; j < adev->sdma.num_instances; j++) { + sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev, + j, regSDMA0_SDMA_STATUS_REG)); + if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK)) + break; + } + if (j == adev->sdma.num_instances) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + u64 sdma_gfx_preempt; + + amdgpu_sdma_get_index_from_ring(ring, &index); + sdma_gfx_preempt = + sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT); + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* emit the trailing fence */ + ring->trail_seq += 1; + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("ring %d failed to be allocated \n", ring->idx); + return r; + } + sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, 0); + amdgpu_ring_commit(ring); + + /* assert IB preemption */ + WREG32(sdma_gfx_preempt, 1); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); + } + + /* deassert IB preemption */ + WREG32(sdma_gfx_preempt, 0); + + /* deassert the preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_cntl; + + u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL); + + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + + return 0; +} + +static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int inst, instances, queue, xcc_id = 0; + uint32_t mes_queue_id = entry->src_data[0]; + + DRM_DEBUG("IH: SDMA trap\n"); + + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { + struct amdgpu_mes_queue *queue; + + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + + spin_lock(&adev->mes.queue_id_lock); + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); + if (queue) { + DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); + amdgpu_fence_process(queue->ring); + } + spin_unlock(&adev->mes.queue_id_lock); + return 0; + } + + queue = entry->ring_id & 0xf; + if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc) + xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id); + else + dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n"); + inst = ((entry->ring_id & 0xf0) >> 4) + + GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc; + for (instances = 0; instances < adev->sdma.num_instances; instances++) { + if (inst == GET_INST(SDMA0, instances)) + break; + } + if (instances > adev->sdma.num_instances - 1) { + DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n"); + return -EINVAL; + } + + switch (entry->client_id) { + case SOC_V1_0_IH_CLIENTID_GFX: + switch (queue) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[instances].ring); + break; + default: + break; + } + break; + } + return 0; +} + +static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + return 0; +} + +static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + return 0; +} + +static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + return 0; +} + +static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, + u64 *flags) +{ +} + +static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v7_1_get_reg_offset(adev, i, + sdma_reg_list_7_1[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + +const struct amd_ip_funcs sdma_v7_1_ip_funcs = { + .name = "sdma_v7_1", + .early_init = sdma_v7_1_early_init, + .late_init = NULL, + .sw_init = sdma_v7_1_sw_init, + .sw_fini = sdma_v7_1_sw_fini, + .hw_init = sdma_v7_1_hw_init, + .hw_fini = sdma_v7_1_hw_fini, + .suspend = sdma_v7_1_suspend, + .resume = sdma_v7_1_resume, + .is_idle = sdma_v7_1_is_idle, + .wait_for_idle = sdma_v7_1_wait_for_idle, + .soft_reset = sdma_v7_1_soft_reset, + .check_soft_reset = sdma_v7_1_check_soft_reset, + .set_clockgating_state = sdma_v7_1_set_clockgating_state, + .set_powergating_state = sdma_v7_1_set_powergating_state, + .get_clockgating_state = sdma_v7_1_get_clockgating_state, + .dump_ip_state = sdma_v7_1_dump_ip_state, + .print_ip_state = sdma_v7_1_print_ip_state, +}; + +static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .secure_submission_supported = true, + .get_rptr = sdma_v7_1_ring_get_rptr, + .get_wptr = sdma_v7_1_ring_get_wptr, + .set_wptr = sdma_v7_1_ring_set_wptr, + .emit_frame_size = + 5 + /* sdma_v7_1_ring_init_cond_exec */ + 6 + /* sdma_v7_1_ring_emit_pipeline_sync */ + /* sdma_v7_1_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */ + .emit_ib = sdma_v7_1_ring_emit_ib, + .emit_mem_sync = sdma_v7_1_ring_emit_mem_sync, + .emit_fence = sdma_v7_1_ring_emit_fence, + .emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v7_1_ring_emit_vm_flush, + .test_ring = sdma_v7_1_ring_test_ring, + .test_ib = sdma_v7_1_ring_test_ib, + .insert_nop = sdma_v7_1_ring_insert_nop, + .pad_ib = sdma_v7_1_ring_pad_ib, + .emit_wreg = sdma_v7_1_ring_emit_wreg, + .emit_reg_wait = sdma_v7_1_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait, + .init_cond_exec = sdma_v7_1_ring_init_cond_exec, + .preempt_ib = sdma_v7_1_ring_preempt_ib, + .reset = sdma_v7_1_reset_queue, +}; + +static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev) +{ + int i, dev_inst; + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs; + adev->sdma.instance[i].ring.me = i; + + dev_inst = GET_INST(SDMA0, i); + /* XCC to which SDMA belongs depends on physical instance */ + adev->sdma.instance[i].xcc_id = + dev_inst / adev->sdma.num_inst_per_xcc; + } +} + +static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = { + .set = sdma_v7_1_set_trap_irq_state, + .process = sdma_v7_1_process_trap_irq, +}; + +static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = { + .process = sdma_v7_1_process_illegal_inst_irq, +}; + +static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + + adev->sdma.num_instances; + adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs; +} + +/** + * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine + * + * @ib: indirect buffer to fill with commands + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * @copy_flags: copy flags for the buffers + * + * Copy GPU buffers using the DMA engine. + * Used by the amdgpu ttm implementation to move pages if + * registered as the asic copy callback. + */ +static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib, + uint64_t src_offset, + uint64_t dst_offset, + uint32_t byte_count, + uint32_t copy_flags) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); + + ib->ptr[ib->length_dw++] = byte_count - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); +} + +/** + * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine + * + * @ib: indirect buffer to fill + * @src_data: value to write to buffer + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Fill GPU buffers using the DMA engine. + */ +static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib, + uint32_t src_data, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count - 1; +} + +static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = { + .copy_max_bytes = 0x400000, + .copy_num_dw = 8, + .emit_copy_buffer = sdma_v7_1_emit_copy_buffer, + .fill_max_bytes = 0x400000, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v7_1_emit_fill_buffer, +}; + +static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev) +{ + adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; +} + +static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = { + .copy_pte_num_dw = 8, + .copy_pte = sdma_v7_1_vm_copy_pte, + .write_pte = sdma_v7_1_vm_write_pte, + .set_pte_pde = sdma_v7_1_vm_set_pte_pde, +}; + +static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev) +{ + unsigned i; + + adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; + } + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; +} + +const struct amdgpu_ip_block_version sdma_v7_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 7, + .minor = 1, + .rev = 0, + .funcs = &sdma_v7_1_ip_funcs, +}; + +static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = sdma_v7_1_inst_start(adev, inst_mask); + + return r; +} + +static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v7_1_inst_enable(adev, false, inst_mask); + + return 0; +} + +struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = { + .suspend = &sdma_v7_1_xcp_suspend, + .resume = &sdma_v7_1_xcp_resume +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h new file mode 100644 index 000000000000..1a07ef09a103 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_V7_1_H__ +#define __SDMA_V7_1_H__ + +extern const struct amd_ip_funcs sdma_v7_1_ip_funcs; +extern const struct amdgpu_ip_block_version sdma_v7_1_ip_block; +extern struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs; + +#endif /* __SDMA_V7_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h new file mode 100644 index 000000000000..b051e4f92088 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h @@ -0,0 +1,5673 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SDMA_V7_1_0_PKT_OPEN_H_ +#define __SDMA_V7_1_0_PKT_OPEN_H_ + +#define SDMA_OP_NOP 0 +#define SDMA_OP_COPY 1 +#define SDMA_OP_WRITE 2 +#define SDMA_OP_INDIRECT 4 +#define SDMA_OP_FENCE 5 +#define SDMA_OP_TRAP 6 +#define SDMA_OP_SEM 7 +#define SDMA_OP_POLL_REGMEM 8 +#define SDMA_OP_COND_EXE 9 +#define SDMA_OP_ATOMIC 10 +#define SDMA_OP_CONST_FILL 11 +#define SDMA_OP_PTEPDE 12 +#define SDMA_OP_TIMESTAMP 13 +#define SDMA_OP_SRBM_WRITE 14 +#define SDMA_OP_PRE_EXE 15 +#define SDMA_OP_GPUVM_INV 16 +#define SDMA_OP_GCR_REQ 17 +#define SDMA_OP_DUMMY_TRAP 32 +#define SDMA_SUBOP_TIMESTAMP_SET 0 +#define SDMA_SUBOP_TIMESTAMP_GET 1 +#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2 +#define SDMA_SUBOP_COPY_LINEAR 0 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4 +#define SDMA_SUBOP_COPY_TILED 1 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6 +#define SDMA_SUBOP_COPY_SOA 3 +#define SDMA_SUBOP_COPY_DIRTY_PAGE 7 +#define SDMA_SUBOP_COPY_LINEAR_PHY 8 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_LARGE 36 +#define SDMA_SUBOP_COPY_LINEAR_BC 16 +#define SDMA_SUBOP_COPY_TILED_BC 17 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22 +#define SDMA_SUBOP_WRITE_LINEAR 0 +#define SDMA_SUBOP_WRITE_TILED 1 +#define SDMA_SUBOP_WRITE_TILED_BC 17 +#define SDMA_SUBOP_PTEPDE_GEN 0 +#define SDMA_SUBOP_PTEPDE_COPY 1 +#define SDMA_SUBOP_PTEPDE_RMW 2 +#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3 +#define SDMA_SUBOP_MEM_INCR 1 +#define SDMA_SUBOP_DATA_FILL_MULTI 1 +#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1 +#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2 +#define SDMA_SUBOP_POLL_MEM_VERIFY 3 +#define SDMA_SUBOP_VM_INVALIDATION 4 +#define HEADER_AGENT_DISPATCH 4 +#define HEADER_BARRIER 5 +#define SDMA_OP_AQL_COPY 0 +#define SDMA_OP_AQL_BARRIER_OR 0 + +#define SDMA_GCR_RANGE_IS_PA (1 << 18) +#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16) +#define SDMA_GCR_GL2_WB (1 << 15) +#define SDMA_GCR_GL2_INV (1 << 14) +#define SDMA_GCR_GL2_DISCARD (1 << 13) +#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11) +#define SDMA_GCR_GL2_US (1 << 10) +#define SDMA_GCR_GL1_INV (1 << 9) +#define SDMA_GCR_GLV_INV (1 << 8) +#define SDMA_GCR_GLK_INV (1 << 7) +#define SDMA_GCR_GLK_WB (1 << 6) +#define SDMA_GCR_GLM_INV (1 << 5) +#define SDMA_GCR_GLM_WB (1 << 4) +#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2) +#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0) + +#define SDMA_DCC_DATA_FORMAT(x) ((x) & 0x3f) +#define SDMA_DCC_NUM_TYPE(x) (((x) & 0x7) << 9) +#define SDMA_DCC_READ_CM(x) (((x) & 0x3) << 16) +#define SDMA_DCC_WRITE_CM(x) (((x) & 0x3) << 18) +#define SDMA_DCC_MAX_COM(x) (((x) & 0x3) << 24) +#define SDMA_DCC_MAX_UCOM(x) (((x) & 0x1) << 26) + +/* +** Definitions for SDMA_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift) + +/*define for npd field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_npd_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_npd_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_npd_shift 28 +#define SDMA_PKT_COPY_LINEAR_HEADER_NPD(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_npd_mask) << SDMA_PKT_COPY_LINEAR_HEADER_npd_shift) + +/*define for backwards field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25 +#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x3FFFFFFF +#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for dst_cache_policy field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_cache_policy field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift) + +/*define for dst_ha field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 19 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift) + +/*define for src_ha field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 27 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift) + +/*define for all field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_mtype field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift) + +/*define for dst_l2_policy field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift) + +/*define for dst_llc field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift 8 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift) + +/*define for src_mtype field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift) + +/*define for src_l2_policy field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift) + +/*define for src_llc field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift 16 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 17 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift) + +/*define for addr_pair_num field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_offset 1 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift 24 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_ADDR_PAIR_NUM(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift) + +/*define for PARAMETER word*/ +/*define for dst_mtype field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift) + +/*define for dst_l2_policy field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift) + +/*define for dst_llc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift 8 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift) + +/*define for src_mtype field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift) + +/*define for src_l2_policy field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift) + +/*define for src_llc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift 16 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 17 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift) + +/*define for dst_log field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x3FFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift) + +/*define for dst2_cache_policy field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift 10 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift) + +/*define for dst1_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift) + +/*define for dst1_cache_policy field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift 18 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_cache_policy field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift 26 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST1_ADDR_LO word*/ +/*define for dst1_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift) + +/*define for DST1_ADDR_HI word*/ +/*define for dst1_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift) + +/*define for DST2_ADDR_LO word*/ +/*define for dst2_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift) + +/*define for DST2_ADDR_HI word*/ +/*define for dst2_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift) + +/*define for dst_cache_policy field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift) + +/*define for src_cache_policy field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift 26 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift) + +/*define for DW_4 word*/ +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift) + +/*define for DW_5 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift) + +/*define for DW_6 word*/ +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift) + +/*define for DW_7 word*/ +/*define for src_slice_pitch_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_SRC_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift) + +/*define for DW_8 word*/ +/*define for src_slice_pitch_47_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask 0x0000FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_SRC_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_11 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift) + +/*define for DW_12 word*/ +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift) + +/*define for DW_13 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_offset 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift) + +/*define for DW_14 word*/ +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_offset 14 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift) + +/*define for DW_15 word*/ +/*define for dst_slice_pitch_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_offset 15 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_DST_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift) + +/*define for DW_16 word*/ +/*define for dst_slice_pitch_47_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_offset 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask 0x0000FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_offset 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift) + +/*define for dst_policy field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_offset 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_offset 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift) + +/*define for src_policy field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_offset 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift 26 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift) + +/*define for DW_17 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_offset 17 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift) + +/*define for DW_18 word*/ +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_offset 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift) + +/*define for DW_19 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_offset 19 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift) + +/*define for dst_ha field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 19 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift) + +/*define for src_ha field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 27 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_TILED_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_HEADER_cpv_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16 +#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift) + +/*define for linear_cache_policy field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift 18 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift) + +/*define for tile_cache_policy field*/ +#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift 26 +#define SDMA_PKT_COPY_TILED_DW_7_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12 +#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x3FFFFFFF +#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0 +#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3 +#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8 +#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15 +#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18 +#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21 +#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24 +#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26 +#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 12 +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2 +#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift 19 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift) + +/*define for videocopy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift) + +/*define for TILED_ADDR_LO_0 word*/ +/*define for tiled_addr0_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift) + +/*define for TILED_ADDR_HI_0 word*/ +/*define for tiled_addr0_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift) + +/*define for TILED_ADDR_LO_1 word*/ +/*define for tiled_addr1_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift) + +/*define for TILED_ADDR_HI_1 word*/ +/*define for tiled_addr1_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift) + +/*define for DW_5 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift) + +/*define for DW_6 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift) + +/*define for DW_7 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift) + +/*define for DW_8 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift) + +/*define for DW_9 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift) + +/*define for DW_10 word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift) + +/*define for dst2_cache_policy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift) + +/*define for linear_cache_policy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift 18 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift) + +/*define for tile_cache_policy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift 26 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x3FFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift) + +/*define for dcc field*/ +#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19 +#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_T2T_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_cpv_shift 28 +#define SDMA_PKT_COPY_T2T_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_cpv_mask) << SDMA_PKT_COPY_T2T_HEADER_cpv_shift) + +/*define for dcc_dir field*/ +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31 +#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift) + +/*define for src_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift) + +/*define for src_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift) + +/*define for src_mip_max field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift) + +/*define for src_mip_id field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift) + +/*define for dst_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift) + +/*define for dst_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift) + +/*define for dst_mip_max field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16 +#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift) + +/*define for dst_mip_id field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20 +#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift) + +/*define for dst_cache_policy field*/ +#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift 18 +#define SDMA_PKT_COPY_T2T_DW_14_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift) + +/*define for src_cache_policy field*/ +#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift 26 +#define SDMA_PKT_COPY_T2T_DW_14_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift) + +/*define for META_ADDR_LO word*/ +/*define for meta_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15 +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift) + +/*define for META_ADDR_HI word*/ +/*define for meta_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16 +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift) + +/*define for META_CONFIG word*/ +/*define for data_format field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0 +#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift) + +/*define for color_transform_disable field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7 +#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift) + +/*define for alpha_is_on_msb field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8 +#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift) + +/*define for number_type field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9 +#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift) + +/*define for surface_type field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12 +#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift) + +/*define for meta_llc field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift 14 +#define SDMA_PKT_COPY_T2T_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift) + +/*define for max_comp_block_size field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24 +#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift) + +/*define for max_uncomp_block_size field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26 +#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift) + +/*define for write_compress_enable field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28 +#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift) + +/*define for meta_tmz field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29 +#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift) + +/*define for pipe_aligned field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift 31 +#define SDMA_PKT_COPY_T2T_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift) + +/*define for src_array_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift) + +/*define for src_mit_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift) + +/*define for src_tilesplit_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift) + +/*define for src_bank_w field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift) + +/*define for src_bank_h field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift) + +/*define for src_num_bank field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift) + +/*define for src_mat_aspt field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift) + +/*define for src_pipe_config field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift) + +/*define for dst_array_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift) + +/*define for dst_mit_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift) + +/*define for dst_tilesplit_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift) + +/*define for dst_bank_w field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift) + +/*define for dst_bank_h field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift) + +/*define for dst_num_bank field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift) + +/*define for dst_mat_aspt field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift) + +/*define for dst_pipe_config field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift) + +/*define for dcc field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift 28 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift) + +/*define for mip_id field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift) + +/*define for linear_cache_policy field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift) + +/*define for tile_cache_policy field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift) + +/*define for META_ADDR_LO word*/ +/*define for meta_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift) + +/*define for META_ADDR_HI word*/ +/*define for meta_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift) + +/*define for META_CONFIG word*/ +/*define for data_format field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift) + +/*define for color_transform_disable field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift) + +/*define for alpha_is_on_msb field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift) + +/*define for number_type field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift) + +/*define for surface_type field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift) + +/*define for meta_llc field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift 14 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift) + +/*define for max_comp_block_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift) + +/*define for max_uncomp_block_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift) + +/*define for write_compress_enable field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift) + +/*define for meta_tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift) + +/*define for pipe_aligned field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_STRUCT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift 28 +#define SDMA_PKT_COPY_STRUCT_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask) << SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift) + +/*define for SB_ADDR_LO word*/ +/*define for sb_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift) + +/*define for SB_ADDR_HI word*/ +/*define for sb_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift) + +/*define for START_INDEX word*/ +/*define for start_index field*/ +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4 +#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0 +#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift) + +/*define for DW_5 word*/ +/*define for stride field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16 +#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift) + +/*define for linear_cache_policy field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift 18 +#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift) + +/*define for struct_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift) + +/*define for struct_cache_policy field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask 0x00000007 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift 26 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_UNTILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift 28 +#define SDMA_PKT_WRITE_UNTILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24 +#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask 0x00000007 +#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift 26 +#define SDMA_PKT_WRITE_UNTILED_DW_3_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4 +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift) + +/*define for cpv field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_cpv_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_cpv_shift 28 +#define SDMA_PKT_WRITE_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_TILED_HEADER_cpv_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF +#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF +#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift 26 +#define SDMA_PKT_WRITE_TILED_DW_7_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask) << SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2 +#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_mtype_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_mtype_mask 0x00000003 +#define SDMA_PKT_PTEPDE_COPY_HEADER_mtype_shift 16 +#define SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_mtype_shift) + +/*define for snoop field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_snoop_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_snoop_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_HEADER_snoop_shift 22 +#define SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_snoop_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_snoop_shift) + +/*define for scope field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_scope_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_scope_mask 0x00000003 +#define SDMA_PKT_PTEPDE_COPY_HEADER_scope_shift 24 +#define SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_scope_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_scope_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7 +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift) + +/*define for dst_cache_policy field*/ +#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_offset 7 +#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask 0x00000007 +#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift 22 +#define SDMA_PKT_PTEPDE_COPY_COUNT_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift) + +/*define for src_cache_policy field*/ +#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_offset 7 +#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask 0x00000007 +#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift 29 +#define SDMA_PKT_PTEPDE_COPY_COUNT_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift) + +/*define for pte_size field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift) + +/*define for direction field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_BIT_FOR_DW word*/ +/*define for mask_first_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift) + +/*define for mask_last_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift) + +/*define for COUNT_IN_32B_XFER word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_RMW packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007 +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16 +#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift) + +/*define for gcc field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24 +#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift) + +/*define for llc_policy field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift 26 +#define SDMA_PKT_PTEPDE_RMW_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift 28 +#define SDMA_PKT_PTEPDE_RMW_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift) + +/*define for MASK_LO word*/ +/*define for mask_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift) + +/*define for MASK_HI word*/ +/*define for mask_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift) + +/*define for VALUE_LO word*/ +/*define for value_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift) + +/*define for VALUE_HI word*/ +/*define for value_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift) + +/*define for COUNT word*/ +/*define for num_of_pte field*/ +#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_offset 7 +#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift 0 +#define SDMA_PKT_PTEPDE_RMW_COUNT_NUM_OF_PTE(x) (((x) & SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask) << SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift) + + +/* +** Definitions for SDMA_PKT_REGISTER_RMW packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_REGISTER_RMW_HEADER_op_offset 0 +#define SDMA_PKT_REGISTER_RMW_HEADER_op_mask 0x000000FF +#define SDMA_PKT_REGISTER_RMW_HEADER_op_shift 0 +#define SDMA_PKT_REGISTER_RMW_HEADER_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_offset 0 +#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift 8 +#define SDMA_PKT_REGISTER_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift) + +/*define for ADDR word*/ +/*define for addr field*/ +#define SDMA_PKT_REGISTER_RMW_ADDR_addr_offset 1 +#define SDMA_PKT_REGISTER_RMW_ADDR_addr_mask 0x000FFFFF +#define SDMA_PKT_REGISTER_RMW_ADDR_addr_shift 0 +#define SDMA_PKT_REGISTER_RMW_ADDR_ADDR(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_addr_mask) << SDMA_PKT_REGISTER_RMW_ADDR_addr_shift) + +/*define for aperture_id field*/ +#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_offset 1 +#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask 0x00000FFF +#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift 20 +#define SDMA_PKT_REGISTER_RMW_ADDR_APERTURE_ID(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask) << SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift) + +/*define for MASK word*/ +/*define for mask field*/ +#define SDMA_PKT_REGISTER_RMW_MASK_mask_offset 2 +#define SDMA_PKT_REGISTER_RMW_MASK_mask_mask 0xFFFFFFFF +#define SDMA_PKT_REGISTER_RMW_MASK_mask_shift 0 +#define SDMA_PKT_REGISTER_RMW_MASK_MASK(x) (((x) & SDMA_PKT_REGISTER_RMW_MASK_mask_mask) << SDMA_PKT_REGISTER_RMW_MASK_mask_shift) + +/*define for VALUE word*/ +/*define for value field*/ +#define SDMA_PKT_REGISTER_RMW_VALUE_value_offset 3 +#define SDMA_PKT_REGISTER_RMW_VALUE_value_mask 0xFFFFFFFF +#define SDMA_PKT_REGISTER_RMW_VALUE_value_shift 0 +#define SDMA_PKT_REGISTER_RMW_VALUE_VALUE(x) (((x) & SDMA_PKT_REGISTER_RMW_VALUE_value_mask) << SDMA_PKT_REGISTER_RMW_VALUE_value_shift) + +/*define for MISC word*/ +/*define for stride field*/ +#define SDMA_PKT_REGISTER_RMW_MISC_stride_offset 4 +#define SDMA_PKT_REGISTER_RMW_MISC_stride_mask 0x000FFFFF +#define SDMA_PKT_REGISTER_RMW_MISC_stride_shift 0 +#define SDMA_PKT_REGISTER_RMW_MISC_STRIDE(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_stride_mask) << SDMA_PKT_REGISTER_RMW_MISC_stride_shift) + +/*define for num_of_reg field*/ +#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_offset 4 +#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask 0x00000FFF +#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift 20 +#define SDMA_PKT_REGISTER_RMW_MISC_NUM_OF_REG(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask) << SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_INCR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift 24 +#define SDMA_PKT_WRITE_INCR_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask) << SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_cpv_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_WRITE_INCR_HEADER_cpv_shift 28 +#define SDMA_PKT_WRITE_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cpv_mask) << SDMA_PKT_WRITE_INCR_HEADER_cpv_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift) + +/*define for INIT_DW0 word*/ +/*define for init_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift) + +/*define for INIT_DW1 word*/ +/*define for init_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift) + +/*define for INCR_DW0 word*/ +/*define for incr_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift) + +/*define for INCR_DW1 word*/ +/*define for incr_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9 +#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_INDIRECT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_INDIRECT_HEADER_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_op_shift 0 +#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8 +#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift) + +/*define for vmid field*/ +#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F +#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16 +#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift) + +/*define for priv field*/ +#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001 +#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31 +#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift) + +/*define for BASE_LO word*/ +/*define for ib_base_31_0 field*/ +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1 +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0 +#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift) + +/*define for BASE_HI word*/ +/*define for ib_base_63_32 field*/ +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2 +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0 +#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift) + +/*define for IB_SIZE word*/ +/*define for ib_size field*/ +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3 +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0 +#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift) + +/*define for CSA_ADDR_LO word*/ +/*define for csa_addr_31_0 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift) + +/*define for CSA_ADDR_HI word*/ +/*define for csa_addr_63_32 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_SEMAPHORE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0 +#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift) + +/*define for write_one field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29 +#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift) + +/*define for signal field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30 +#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift) + +/*define for mailbox field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31 +#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_MEM_INCR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_MEM_INCR_HEADER_op_offset 0 +#define SDMA_PKT_MEM_INCR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_MEM_INCR_HEADER_op_shift 0 +#define SDMA_PKT_MEM_INCR_HEADER_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_op_mask) << SDMA_PKT_MEM_INCR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_MEM_INCR_HEADER_sub_op_offset 0 +#define SDMA_PKT_MEM_INCR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_MEM_INCR_HEADER_sub_op_shift 8 +#define SDMA_PKT_MEM_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_sub_op_mask) << SDMA_PKT_MEM_INCR_HEADER_sub_op_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_offset 0 +#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift 24 +#define SDMA_PKT_MEM_INCR_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift) + +/*define for llc_policy field*/ +#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_offset 0 +#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask 0x00000001 +#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift 26 +#define SDMA_PKT_MEM_INCR_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_MEM_INCR_HEADER_cpv_offset 0 +#define SDMA_PKT_MEM_INCR_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_MEM_INCR_HEADER_cpv_shift 28 +#define SDMA_PKT_MEM_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_cpv_mask) << SDMA_PKT_MEM_INCR_HEADER_cpv_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_MEM_INCR_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask) << SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_MEM_INCR_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask) << SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_VM_INVALIDATION packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 +#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) + +/*define for gfx_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16 +#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift) + +/*define for mm_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24 +#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift) + +/*define for INVALIDATEREQ word*/ +/*define for invalidatereq field*/ +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) + +/*define for ADDRESSRANGELO word*/ +/*define for addressrangelo field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) + +/*define for ADDRESSRANGEHI word*/ +/*define for invalidateack field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) + +/*define for addressrangehi field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) + +/*define for reserved field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) + + +/* +** Definitions for SDMA_PKT_FENCE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_FENCE_HEADER_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_op_shift 0 +#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8 +#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_FENCE_HEADER_mtype_offset 0 +#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007 +#define SDMA_PKT_FENCE_HEADER_mtype_shift 16 +#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift) + +/*define for gcc field*/ +#define SDMA_PKT_FENCE_HEADER_gcc_offset 0 +#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_gcc_shift 19 +#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_FENCE_HEADER_sys_offset 0 +#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_sys_shift 20 +#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_FENCE_HEADER_snp_offset 0 +#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_snp_shift 22 +#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_FENCE_HEADER_gpa_offset 0 +#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_gpa_shift 23 +#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0 +#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24 +#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift) + +/*define for llc_policy field*/ +#define SDMA_PKT_FENCE_HEADER_llc_policy_offset 0 +#define SDMA_PKT_FENCE_HEADER_llc_policy_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_llc_policy_shift 26 +#define SDMA_PKT_FENCE_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_llc_policy_mask) << SDMA_PKT_FENCE_HEADER_llc_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_FENCE_HEADER_cpv_offset 0 +#define SDMA_PKT_FENCE_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_cpv_shift 28 +#define SDMA_PKT_FENCE_HEADER_CPV(x) (((x) & SDMA_PKT_FENCE_HEADER_cpv_mask) << SDMA_PKT_FENCE_HEADER_cpv_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_FENCE_DATA_data_offset 3 +#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_DATA_data_shift 0 +#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_SRBM_WRITE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift) + +/*define for byte_en field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28 +#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift) + +/*define for ADDR word*/ +/*define for addr field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0 +#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift) + +/*define for apertureid field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20 +#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2 +#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0 +#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_PRE_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0 +#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift) + +/*define for dev_sel field*/ +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16 +#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_COND_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COND_EXE_HEADER_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_op_shift 0 +#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_COND_EXE_HEADER_cache_policy_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_COND_EXE_HEADER_cache_policy_shift 24 +#define SDMA_PKT_COND_EXE_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cache_policy_mask) << SDMA_PKT_COND_EXE_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_COND_EXE_HEADER_cpv_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_COND_EXE_HEADER_cpv_shift 28 +#define SDMA_PKT_COND_EXE_HEADER_CPV(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cpv_mask) << SDMA_PKT_COND_EXE_HEADER_cpv_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift) + +/*define for REFERENCE word*/ +/*define for reference field*/ +#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3 +#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0 +#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_CONSTANT_FILL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift) + +/*define for sw field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift 24 +#define SDMA_PKT_CONSTANT_FILL_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift 28 +#define SDMA_PKT_CONSTANT_FILL_HEADER_CPV(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift) + +/*define for fillsize field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30 +#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DATA word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3 +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4 +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x3FFFFFFF +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0 +#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_DATA_FILL_MULTI packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift 24 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift 28 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CPV(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift) + +/*define for memlog_clr field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift) + +/*define for BYTE_STRIDE word*/ +/*define for byte_stride field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift) + +/*define for DMA_COUNT word*/ +/*define for dma_count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for BYTE_COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REGMEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift 20 +#define SDMA_PKT_POLL_REGMEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift 24 +#define SDMA_PKT_POLL_REGMEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift) + +/*define for hdp_flush field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26 +#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift) + +/*define for func field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28 +#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift) + +/*define for mem_poll field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31 +#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift) + +/*define for VALUE word*/ +/*define for value field*/ +#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3 +#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0 +#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift) + +/*define for MASK word*/ +/*define for mask field*/ +#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4 +#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0 +#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift) + +/*define for DW5 word*/ +/*define for interval field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF +#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0 +#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift) + +/*define for retry_count field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16 +#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift 24 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift 28 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift) + +/*define for SRC_ADDR word*/ +/*define for addr_31_2 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift) + +/*define for ea field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift 24 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift 28 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + +/*define for START_PAGE word*/ +/*define for addr_31_4 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift) + +/*define for PAGE_NUM word*/ +/*define for page_num_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift) + + +/* +** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift 24 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift 28 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift) + +/*define for mode field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift) + +/*define for PATTERN word*/ +/*define for pattern field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift) + +/*define for CMP0_ADDR_START_LO word*/ +/*define for cmp0_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift) + +/*define for CMP0_ADDR_START_HI word*/ +/*define for cmp0_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift) + +/*define for CMP0_ADDR_END_LO word*/ +/*define for cmp0_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_offset 4 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP0_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift) + +/*define for CMP0_ADDR_END_HI word*/ +/*define for cmp0_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_offset 5 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP0_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift) + +/*define for CMP1_ADDR_START_LO word*/ +/*define for cmp1_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift) + +/*define for CMP1_ADDR_START_HI word*/ +/*define for cmp1_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift) + +/*define for CMP1_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP1_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for REC_ADDR_LO word*/ +/*define for rec_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift) + +/*define for REC_ADDR_HI word*/ +/*define for rec_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift) + +/*define for RESERVED word*/ +/*define for reserved field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift) + + +/* +** Definitions for SDMA_PKT_ATOMIC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_ATOMIC_HEADER_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_ATOMIC_HEADER_op_shift 0 +#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift) + +/*define for loop field*/ +#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16 +#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift) + +/*define for tmz field*/ +#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18 +#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift) + +/*define for cache_policy field*/ +#define SDMA_PKT_ATOMIC_HEADER_cache_policy_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_cache_policy_mask 0x00000007 +#define SDMA_PKT_ATOMIC_HEADER_cache_policy_shift 20 +#define SDMA_PKT_ATOMIC_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cache_policy_mask) << SDMA_PKT_ATOMIC_HEADER_cache_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_ATOMIC_HEADER_cpv_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_cpv_shift 24 +#define SDMA_PKT_ATOMIC_HEADER_CPV(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cpv_mask) << SDMA_PKT_ATOMIC_HEADER_cpv_shift) + +/*define for atomic_op field*/ +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25 +#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift) + +/*define for SRC_DATA_LO word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift) + +/*define for SRC_DATA_HI word*/ +/*define for src_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift) + +/*define for CMP_DATA_LO word*/ +/*define for cmp_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift) + +/*define for CMP_DATA_HI word*/ +/*define for cmp_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift) + +/*define for LOOP_INTERVAL word*/ +/*define for loop_interval field*/ +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_SET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift) + +/*define for INIT_DATA_LO word*/ +/*define for init_data_31_0 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift) + +/*define for INIT_DATA_HI word*/ +/*define for init_data_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift 24 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift) + +/*define for llc_policy field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask 0x00000001 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift 26 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift 28 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift 24 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift) + +/*define for llc_policy field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask 0x00000001 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift 26 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift) + +/*define for cpv field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask 0x00000001 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift 28 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_DUMMY_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_GPUVM_INV packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0 +#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF +#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0 +#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0 +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8 +#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift) + +/*define for PAYLOAD1 word*/ +/*define for per_vmid_inv_req field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift) + +/*define for flush_type field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift) + +/*define for l2_ptes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift) + +/*define for l2_pde0 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift) + +/*define for l2_pde1 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift) + +/*define for l2_pde2 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift) + +/*define for l1_ptes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift) + +/*define for clr_protection_fault_status_addr field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift) + +/*define for log_request field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift) + +/*define for four_kilobytes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift) + +/*define for PAYLOAD2 word*/ +/*define for s field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift) + +/*define for page_va_42_12 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift) + +/*define for PAYLOAD3 word*/ +/*define for page_va_47_43 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3 +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift) + + +/* +** Definitions for SDMA_PKT_GCR_REQ packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0 +#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF +#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0 +#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0 +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8 +#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift) + +/*define for PAYLOAD1 word*/ +/*define for base_va_31_7 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1 +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7 +#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift) + +/*define for PAYLOAD2 word*/ +/*define for base_va_56_32 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_offset 2 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_mask 0x00FFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_shift) + +/*define for PAYLOAD3 word*/ +/*define for gcr_control_18_0 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_offset 3 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_mask 0x0007FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_shift) + +/*define for limit_va_15_7 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_offset 3 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_mask 0x000001FF +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_shift 23 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_shift) + +/*define for PAYLOAD4 word*/ +/*define for limit_va_47_16 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_offset 4 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_mask 0xFFFFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_shift) + +/*define for PAYLOAD5 word*/ +/*define for limit_va_56_48 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_offset 5 +#define SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_mask 0x000001FF +#define SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_mask) << SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_shift) + +/*define for vmid field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_offset 5 +#define SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_mask 0x0000000F +#define SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_shift 26 +#define SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_shift) + + +/* +** Definitions for SDMA_PKT_NOP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_NOP_HEADER_op_offset 0 +#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_op_shift 0 +#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_NOP_HEADER_sub_op_offset 0 +#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 +#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) + +/*define for count field*/ +#define SDMA_PKT_NOP_HEADER_count_offset 0 +#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF +#define SDMA_PKT_NOP_HEADER_count_shift 16 +#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_NOP_DATA0_data0_offset 1 +#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_NOP_DATA0_data0_shift 0 +#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift) + + +/* +** Definitions for SDMA_AQL_PKT_HEADER packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0 +#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16 +#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift) + +/*define for cpv field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_cpv_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_cpv_mask 0x00000001 +#define SDMA_AQL_PKT_HEADER_HEADER_cpv_shift 28 +#define SDMA_AQL_PKT_HEADER_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_cpv_mask) << SDMA_AQL_PKT_HEADER_HEADER_cpv_shift) + + +/* +** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift) + +/*define for cpv field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift 28 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift) + +/*define for RETURN_ADDR_LO word*/ +/*define for return_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift) + +/*define for RETURN_ADDR_HI word*/ +/*define for return_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for dst_cache_policy field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift) + +/*define for src_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_cache_policy field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for RESERVED_DW10 word*/ +/*define for reserved_dw10 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift) + +/*define for RESERVED_DW11 word*/ +/*define for reserved_dw11 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +/* +** Definitions for SDMA_AQL_PKT_BARRIER_OR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift) + +/*define for cpv field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask 0x00000001 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift 28 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift) + +/*define for DEPENDENT_ADDR_0_LO word*/ +/*define for dependent_addr_0_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift) + +/*define for DEPENDENT_ADDR_0_HI word*/ +/*define for dependent_addr_0_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift) + +/*define for DEPENDENT_ADDR_1_LO word*/ +/*define for dependent_addr_1_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift) + +/*define for DEPENDENT_ADDR_1_HI word*/ +/*define for dependent_addr_1_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift) + +/*define for DEPENDENT_ADDR_2_LO word*/ +/*define for dependent_addr_2_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift) + +/*define for DEPENDENT_ADDR_2_HI word*/ +/*define for dependent_addr_2_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift) + +/*define for DEPENDENT_ADDR_3_LO word*/ +/*define for dependent_addr_3_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift) + +/*define for DEPENDENT_ADDR_3_HI word*/ +/*define for dependent_addr_3_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift) + +/*define for DEPENDENT_ADDR_4_LO word*/ +/*define for dependent_addr_4_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift) + +/*define for DEPENDENT_ADDR_4_HI word*/ +/*define for dependent_addr_4_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift) + +/*define for CACHE_POLICY word*/ +/*define for cache_policy0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift) + +/*define for cache_policy1 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift 5 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift) + +/*define for cache_policy2 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift 10 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY2(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift) + +/*define for cache_policy3 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift 15 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY3(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift) + +/*define for cache_policy4 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift 20 +#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY4(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +#endif /* __SDMA_V7_1_0_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f7288372ee61..509d43b238f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2003,10 +2003,6 @@ static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) return 0; } -static void si_pre_asic_init(struct amdgpu_device *adev) -{ -} - static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -2028,7 +2024,6 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .need_reset_on_init = &si_need_reset_on_init, .get_pcie_replay_count = &si_get_pcie_replay_count, .supports_baco = &si_asic_supports_baco, - .pre_asic_init = &si_pre_asic_init, .query_video_codecs = &si_query_video_codecs, }; @@ -2260,16 +2255,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate == 2) { - DRM_INFO("PCIE gen 3 link speeds already enabled\n"); + drm_info(adev_to_drm(adev), "PCIE gen 3 link speeds already enabled\n"); return; } - DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n"); + drm_info(adev_to_drm(adev), "enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n"); } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) { if (current_data_rate == 1) { - DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + drm_info(adev_to_drm(adev), "PCIE gen 2 link speeds already enabled\n"); return; } - DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); + drm_info(adev_to_drm(adev), "enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 7f18e4875287..74fcaa340d9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -584,7 +584,7 @@ static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block) { - DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); + drm_info(adev_to_drm(ip_block->adev), "si_dma_soft_reset --- not implemented !!!!!!!\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 68aef47254a9..c86acee05d0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -196,7 +196,9 @@ static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) { reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); - DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source); + drm_info(adev_to_drm(adev), + "TX was terminated, IC_TX_ABRT_SOURCE val is:%x", + reg_c_tx_abrt_source); /* Check for stop due to NACK */ if (REG_GET_FIELD(reg_c_tx_abrt_source, @@ -769,7 +771,7 @@ bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef}; - DRM_INFO("Begin"); + drm_info(adev_to_drm(adev), "Begin"); if (!smu_v11_0_i2c_bus_lock(control)) { DRM_ERROR("Failed to lock the bus!."); @@ -788,7 +790,7 @@ bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) smu_v11_0_i2c_bus_unlock(control); - DRM_INFO("End"); + drm_info(adev_to_drm(adev), "End"); return true; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c new file mode 100644 index 000000000000..eccc76650d82 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c @@ -0,0 +1,50 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v15_0_0.h" +#include "smuio/smuio_15_0_0_offset.h" +#include "smuio/smuio_15_0_0_sh_mask.h" +#include <linux/preempt.h> + +static u64 smuio_v15_0_0_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + u64 clock; + u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; + + preempt_disable(); + clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + /* the clock counter may be udpated during polling the counters */ + clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); + + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); + + return clock; +} + +const struct amdgpu_smuio_funcs smuio_v15_0_0_funcs = { + .get_gpu_clock_counter = smuio_v15_0_0_get_gpu_clock_counter, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h new file mode 100644 index 000000000000..85e0f08283d9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V15_0_0_H__ +#define __SMUIO_V15_0_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v15_0_0_funcs; + +#endif /* __SMUIO_V15_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c new file mode 100644 index 000000000000..ef29424c26cc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c @@ -0,0 +1,213 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v15_0_8.h" +#include "smuio/smuio_15_0_8_offset.h" +#include "smuio/smuio_15_0_8_sh_mask.h" + +#define SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK 0x00000001L +#define SMUIO_MCM_CONFIG__ETHERNET_SWITCH_MASK 0x00000008L +#define SMUIO_MCM_CONFIG__CUSTOM_HBM_MASK 0x00000001L + +static u32 smuio_v15_0_8_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX); +} + +static u32 smuio_v15_0_8_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA); +} + +static void smuio_v15_0_8_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ + return; +} + +static u64 smuio_v15_0_8_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + u64 clock; + u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; + + preempt_disable(); + clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + /* the clock counter may be udpated during polling the counters */ + clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); + + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); + + return clock; +} + +static void smuio_v15_0_8_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) +{ + u32 data; + + /* CGTT_ROM_CLK_CTRL0 is not available for APU */ + if (adev->flags & AMD_IS_APU) + return; + + data = RREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0); + if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) + *flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +/** + * smuio_v15_0_8_get_die_id - query die id from FCH. + * + * @adev: amdgpu device pointer + * + * Returns die id + */ +static u32 smuio_v15_0_8_get_die_id(struct amdgpu_device *adev) +{ + u32 data, die_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID); + + return die_id; +} + +/** + * smuio_v15_0_8_get_socket_id - query socket id from FCH + * + * @adev: amdgpu device pointer + * + * Returns socket id + */ +static u32 smuio_v15_0_8_get_socket_id(struct amdgpu_device *adev) +{ + u32 data, socket_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID); + + return socket_id; +} + +/** + * smuio_v15_0_8_is_host_gpu_xgmi_supported - detect xgmi interface between cpu and gpu/s. + * + * @adev: amdgpu device pointer + * + * Returns true on success or false otherwise. + */ +static bool smuio_v15_0_8_is_host_gpu_xgmi_supported(struct amdgpu_device *adev) +{ + u32 data; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID); + /* data[4:0] + * bit 0 == 0 host-gpu interface is PCIE + * bit 0 == 1 host-gpu interface is Alternate Protocal + * for AMD, this is XGMI + */ + data &= SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK; + + return data ? true : false; +} + +#if 0 +/* + * smuio_v15_0_8_is_connected_with_ethernet_switch - detect systems connected with ethernet switch + * + * @adev: amdgpu device pointer + * + * Returns true on success or false otherwise. + */ +static bool smuio_v15_0_8_is_connected_with_ethernet_switch(struct amdgpu_device *adev) +{ + u32 data; + + if (!(adev->flags & AMD_IS_APU)) + return false; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID); + /* data[4:0] + * bit 3 == 0 systems connected with ethernet switch + */ + data &= SMUIO_MCM_CONFIG__ETHERNET_SWITCH_MASK; + + return data ? false : true; +} +#endif + +static enum amdgpu_pkg_type smuio_v15_0_8_get_pkg_type(struct amdgpu_device *adev) +{ + enum amdgpu_pkg_type pkg_type; + u32 data; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE); + + /* data [3:0] + bit 2 and bit 3 identifies the pkg type */ + switch (data & 0xC) { + case 0x0: + pkg_type = AMDGPU_PKG_TYPE_BB; + break; + case 0x8: + pkg_type = AMDGPU_PKG_TYPE_CEM; + break; + default: + pkg_type = AMDGPU_PKG_TYPE_UNKNOWN; + break; + } + + return pkg_type; +} + +#if 0 +static bool smuio_v15_0_8_is_custom_hbm_supported(struct amdgpu_device *adev) +{ + u32 data; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE); + + /* data [3:0] + * bit 0 identifies custom HBM module */ + data &= SMUIO_MCM_CONFIG__CUSTOM_HBM_MASK; + + return data ? true : false; +} +#endif + +const struct amdgpu_smuio_funcs smuio_v15_0_8_funcs = { + .get_rom_index_offset = smuio_v15_0_8_get_rom_index_offset, + .get_rom_data_offset = smuio_v15_0_8_get_rom_data_offset, + .get_gpu_clock_counter = smuio_v15_0_8_get_gpu_clock_counter, + .get_die_id = smuio_v15_0_8_get_die_id, + .get_socket_id = smuio_v15_0_8_get_socket_id, + .is_host_gpu_xgmi_supported = smuio_v15_0_8_is_host_gpu_xgmi_supported, + .update_rom_clock_gating = smuio_v15_0_8_update_rom_clock_gating, + .get_clock_gating_state = smuio_v15_0_8_get_clock_gating_state, + .get_pkg_type = smuio_v15_0_8_get_pkg_type, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h new file mode 100644 index 000000000000..508547fcce03 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V15_0_8_H__ +#define __SMUIO_V15_0_8_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v15_0_8_funcs; + +#endif /* __SMUIO_V15_0_8_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 42f5d9c0e3af..54b14751fd7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -951,7 +951,6 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &soc15_supports_baco, - .pre_asic_init = &soc15_pre_asic_init, .query_video_codecs = &soc15_query_video_codecs, .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, .get_reg_state = &aqua_vanjaram_get_reg_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index ad36c96478a8..2da733b45c21 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -141,6 +141,31 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = { .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, }; +static const struct amdgpu_video_codec_info vcn_5_3_0_video_codecs_encode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_3_0_video_codecs_encode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_3_0_video_codecs_encode_array_vcn0), + .codec_array = vcn_5_3_0_video_codecs_encode_array_vcn0, +}; + +static const struct amdgpu_video_codec_info vcn_5_3_0_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_3_0_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_3_0_video_codecs_decode_array_vcn0), + .codec_array = vcn_5_3_0_video_codecs_decode_array_vcn0, +}; + + static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -185,6 +210,12 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; return 0; + case IP_VERSION(5, 3, 0): + if (encode) + *codecs = &vcn_5_3_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_5_3_0_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -513,10 +544,6 @@ static void soc21_init_doorbell_index(struct amdgpu_device *adev) adev->doorbell_index.sdma_doorbell_range = 20; } -static void soc21_pre_asic_init(struct amdgpu_device *adev) -{ -} - static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { @@ -546,7 +573,6 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = { .need_reset_on_init = &soc21_need_reset_on_init, .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &amdgpu_dpm_is_baco_supported, - .pre_asic_init = &soc21_pre_asic_init, .query_video_codecs = &soc21_query_video_codecs, .update_umd_stable_pstate = &soc21_update_umd_stable_pstate, }; @@ -804,6 +830,13 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x50; break; + case IP_VERSION(11, 5, 4): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0x1; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -965,6 +998,7 @@ static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, case IP_VERSION(7, 11, 1): case IP_VERSION(7, 11, 2): case IP_VERSION(7, 11, 3): + case IP_VERSION(7, 11, 4): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 972b449ab89f..ecb6c3fcfbd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -327,10 +327,6 @@ static void soc24_init_doorbell_index(struct amdgpu_device *adev) adev->doorbell_index.sdma_doorbell_range = 20; } -static void soc24_pre_asic_init(struct amdgpu_device *adev) -{ -} - static int soc24_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { @@ -357,7 +353,6 @@ static const struct amdgpu_asic_funcs soc24_asic_funcs = { .need_reset_on_init = &soc24_need_reset_on_init, .get_pcie_replay_count = &soc24_get_pcie_replay_count, .supports_baco = &amdgpu_dpm_is_baco_supported, - .pre_asic_init = &soc24_pre_asic_init, .query_video_codecs = &soc24_query_video_codecs, .update_umd_stable_pstate = &soc24_update_umd_stable_pstate, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c new file mode 100644 index 000000000000..59ab952d5cce --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c @@ -0,0 +1,862 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" +#include "soc15_common.h" +#include "soc_v1_0.h" +#include "amdgpu_ip.h" +#include "amdgpu_imu.h" +#include "gfxhub_v12_1.h" +#include "sdma_v7_1.h" +#include "gfx_v12_1.h" + +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "mp/mp_15_0_8_offset.h" + +#define XCC_REG_RANGE_0_LOW 0x1260 /* XCC gfxdec0 lower Bound */ +#define XCC_REG_RANGE_0_HIGH 0x3C00 /* XCC gfxdec0 upper Bound */ +#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ +#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ +#define NORMALIZE_XCC_REG_OFFSET(offset) \ + (offset & 0xFFFF) + +/* Initialized doorbells for amdgpu including multimedia + * KFD can use all the rest in 2M doorbell bar */ +static void soc_v1_0_doorbell_index_init(struct amdgpu_device *adev) +{ + int i; + + adev->doorbell_index.kiq = AMDGPU_SOC_V1_0_DOORBELL_KIQ_START; + + adev->doorbell_index.mec_ring0 = AMDGPU_SOC_V1_0_DOORBELL_MEC_RING_START; + adev->doorbell_index.mes_ring0 = AMDGPU_SOC_V1_0_DOORBELL_MES_RING0; + adev->doorbell_index.mes_ring1 = AMDGPU_SOC_V1_0_DOORBELL_MES_RING1; + + adev->doorbell_index.userqueue_start = AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_END; + adev->doorbell_index.xcc_doorbell_range = AMDGPU_SOC_V1_0_DOORBELL_XCC_RANGE; + + adev->doorbell_index.sdma_doorbell_range = 20; + for (i = 0; i < adev->sdma.num_instances; i++) + adev->doorbell_index.sdma_engine[i] = + AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START + + i * (adev->doorbell_index.sdma_doorbell_range >> 1); + + adev->doorbell_index.ih = AMDGPU_SOC_V1_0_DOORBELL_IH; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_SOC_V1_0_DOORBELL_VCN_START; + + adev->doorbell_index.first_non_cp = AMDGPU_SOC_V1_0_DOORBELL_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_SOC_V1_0_DOORBELL_LAST_NON_CP; + + adev->doorbell_index.max_assignment = AMDGPU_SOC_V1_0_DOORBELL_MAX_ASSIGNMENT << 1; +} + +/* Fixed pattern for upper 32bits smn addressing. + * bit[47:40]: Socket ID + * bit[39:34]: Die ID + * bit[32]: local or remote die in same socket + * The ext_id is comprised of socket_id and die_id. + * ext_id = (socket_id << 6) | (die_id) +*/ +u64 soc_v1_0_encode_ext_smn_addressing(int ext_id) +{ + u64 ext_offset; + int socket_id, die_id; + + /* local die routing for MID0 on local socket */ + if (ext_id == 0) + return 0; + + die_id = ext_id & 0x3; + socket_id = (ext_id >> 6) & 0xff; + + /* Initiated from host, accessing to non-MID0 is cross-die traffic */ + if (socket_id == 0) + ext_offset = ((u64)die_id << 34) | (1ULL << 32); + else if (socket_id != 0 && die_id != 0) + ext_offset = ((u64)socket_id << 40) | ((u64)die_id << 34) | + (3ULL << 32); + else + ext_offset = ((u64)socket_id << 40) | (1ULL << 33); + + return ext_offset; +} + +static u32 soc_v1_0_get_config_memsize(struct amdgpu_device *adev) +{ + return adev->nbio.funcs->get_memsize(adev); +} + +static u32 soc_v1_0_get_xclk(struct amdgpu_device *adev) +{ + return adev->clock.spll.reference_freq; +} + +void soc_v1_0_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, + u32 queue, u32 vmid, + int xcc_id) +{ + u32 grbm_gfx_cntl = 0; + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); + + WREG32_SOC15_RLC_SHADOW(GC, xcc_id, regGRBM_GFX_CNTL, grbm_gfx_cntl); +} + +static struct soc15_allowed_register_entry soc_v1_0_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS) }, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2) }, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS3) }, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0) }, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1) }, + { SOC15_REG_ENTRY(GC, 0, regCP_STAT) }, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1) }, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2) }, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3) }, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT) }, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1) }, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS) }, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT) }, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1) }, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS) }, + { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG_1) }, +}; + +static uint32_t soc_v1_0_read_indexed_register(struct amdgpu_device *adev, + u32 se_num, + u32 sh_num, + u32 reg_offset) +{ + uint32_t val; + + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); + + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); + mutex_unlock(&adev->grbm_idx_mutex); + return val; +} + +static uint32_t soc_v1_0_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + if (indexed) { + return soc_v1_0_read_indexed_register(adev, se_num, sh_num, reg_offset); + } else { + if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG_1) && + adev->gfx.config.gb_addr_config) + return adev->gfx.config.gb_addr_config; + return RREG32(reg_offset); + } +} + +static int soc_v1_0_read_register(struct amdgpu_device *adev, + u32 se_num, u32 sh_num, + u32 reg_offset, u32 *value) +{ + uint32_t i; + struct soc15_allowed_register_entry *en; + + *value = 0; + for (i = 0; i < ARRAY_SIZE(soc_v1_0_allowed_read_registers); i++) { + en = &soc_v1_0_allowed_read_registers[i]; + if (!adev->reg_offset[en->hwip][en->inst]) + continue; + else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + + en->reg_offset)) + continue; + + *value = soc_v1_0_get_register_value(adev, + soc_v1_0_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +} + +static bool soc_v1_0_need_full_reset(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 1, 0): + default: + return true; + } +} + +static bool soc_v1_0_need_reset_on_init(struct amdgpu_device *adev) +{ + u32 sol_reg; + + if (adev->flags & AMD_IS_APU) + return false; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + if (sol_reg) + return true; + + return false; +} + +static int soc_v1_0_asic_reset(struct amdgpu_device *adev) +{ + return 0; +} + +static const struct amdgpu_asic_funcs soc_v1_0_asic_funcs = { + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, + .read_register = &soc_v1_0_read_register, + .get_config_memsize = &soc_v1_0_get_config_memsize, + .get_xclk = &soc_v1_0_get_xclk, + .need_full_reset = &soc_v1_0_need_full_reset, + .init_doorbell_index = &soc_v1_0_doorbell_index_init, + .need_reset_on_init = &soc_v1_0_need_reset_on_init, + .encode_ext_smn_addressing = &soc_v1_0_encode_ext_smn_addressing, + .reset = soc_v1_0_asic_reset, +}; + +static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->smc_rreg = NULL; + adev->smc_wreg = NULL; + adev->pcie_rreg = &amdgpu_device_indirect_rreg; + adev->pcie_wreg = &amdgpu_device_indirect_wreg; + adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext; + adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext; + adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; + adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; + adev->pciep_rreg = amdgpu_device_pcie_port_rreg; + adev->pciep_wreg = amdgpu_device_pcie_port_wreg; + adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext; + adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext; + adev->uvd_ctx_rreg = NULL; + adev->uvd_ctx_wreg = NULL; + adev->didt_rreg = NULL; + adev->didt_wreg = NULL; + + adev->asic_funcs = &soc_v1_0_asic_funcs; + + adev->rev_id = amdgpu_device_get_rev_id(adev); + adev->external_rev_id = 0xff; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 1, 0): + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x50; + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + + return 0; +} + +static int soc_v1_0_common_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + + return 0; +} + +static int soc_v1_0_common_sw_init(struct amdgpu_ip_block *ip_block) +{ + return 0; +} + +static int soc_v1_0_common_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + /* enable the doorbell aperture */ + adev->nbio.funcs->enable_doorbell_aperture(adev, true); + + return 0; +} + +static int soc_v1_0_common_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); + + return 0; +} + +static int soc_v1_0_common_suspend(struct amdgpu_ip_block *ip_block) +{ + return soc_v1_0_common_hw_fini(ip_block); +} + +static int soc_v1_0_common_resume(struct amdgpu_ip_block *ip_block) +{ + return soc_v1_0_common_hw_init(ip_block); +} + +static bool soc_v1_0_common_is_idle(struct amdgpu_ip_block *ip_block) +{ + return true; +} + +static int soc_v1_0_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + return 0; +} + +static int soc_v1_0_common_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + return 0; +} + +static void soc_v1_0_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, + u64 *flags) +{ + return; +} + +static const struct amd_ip_funcs soc_v1_0_common_ip_funcs = { + .name = "soc_v1_0_common", + .early_init = soc_v1_0_common_early_init, + .late_init = soc_v1_0_common_late_init, + .sw_init = soc_v1_0_common_sw_init, + .hw_init = soc_v1_0_common_hw_init, + .hw_fini = soc_v1_0_common_hw_fini, + .suspend = soc_v1_0_common_suspend, + .resume = soc_v1_0_common_resume, + .is_idle = soc_v1_0_common_is_idle, + .set_clockgating_state = soc_v1_0_common_set_clockgating_state, + .set_powergating_state = soc_v1_0_common_set_powergating_state, + .get_clockgating_state = soc_v1_0_common_get_clockgating_state, +}; + +const struct amdgpu_ip_block_version soc_v1_0_common_ip_block = { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &soc_v1_0_common_ip_funcs, +}; + +static enum amdgpu_gfx_partition __soc_v1_0_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc, num_xcc_per_xcp = 0, mode = 0; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + if (adev->gfx.funcs && + adev->gfx.funcs->get_xccs_per_xcp) + num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); + if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0)) + mode = num_xcc / num_xcc_per_xcp; + + if (num_xcc_per_xcp == 1) + return AMDGPU_CPX_PARTITION_MODE; + + switch (mode) { + case 1: + return AMDGPU_SPX_PARTITION_MODE; + case 2: + return AMDGPU_DPX_PARTITION_MODE; + case 3: + return AMDGPU_TPX_PARTITION_MODE; + case 4: + return AMDGPU_QPX_PARTITION_MODE; + default: + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + } + + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + +static int soc_v1_0_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + enum amdgpu_gfx_partition derv_mode, mode; + struct amdgpu_device *adev = xcp_mgr->adev; + + mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + derv_mode = __soc_v1_0_calc_xcp_mode(xcp_mgr); + + if (amdgpu_sriov_vf(adev) || !adev->psp.funcs) + return derv_mode; + + if (adev->nbio.funcs && + adev->nbio.funcs->get_compute_partition_mode) { + mode = adev->nbio.funcs->get_compute_partition_mode(adev); + if (mode != derv_mode) + dev_warn(adev->dev, + "Mismatch in compute partition mode - reported : %d derived : %d", + mode, derv_mode); + } + + return mode; +} + +static int __soc_v1_0_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode) +{ + int num_xcc, num_xcc_per_xcp = 0; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc; + break; + case AMDGPU_DPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc / 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc / 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc / 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + num_xcc_per_xcp = 1; + break; + } + + return num_xcc_per_xcp; +} + +static int __soc_v1_0_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + enum AMDGPU_XCP_IP_BLOCK ip_id, + struct amdgpu_xcp_ip *ip) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_sdma, num_vcn, num_shared_vcn, num_xcp; + int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp; + + num_sdma = adev->sdma.num_instances; + num_vcn = adev->vcn.num_vcn_inst; + num_shared_vcn = 1; + + num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp; + + switch (xcp_mgr->mode) { + case AMDGPU_SPX_PARTITION_MODE: + case AMDGPU_DPX_PARTITION_MODE: + case AMDGPU_TPX_PARTITION_MODE: + case AMDGPU_QPX_PARTITION_MODE: + case AMDGPU_CPX_PARTITION_MODE: + num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp); + num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp); + break; + default: + return -EINVAL; + } + + if (num_vcn && num_xcp > num_vcn) + num_shared_vcn = num_xcp / num_vcn; + + switch (ip_id) { + case AMDGPU_XCP_GFXHUB: + ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id); + ip->ip_funcs = &gfxhub_v12_1_xcp_funcs; + break; + case AMDGPU_XCP_GFX: + ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id); + ip->ip_funcs = &gfx_v12_1_xcp_funcs; + break; + case AMDGPU_XCP_SDMA: + ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id); + ip->ip_funcs = &sdma_v7_1_xcp_funcs; + break; + case AMDGPU_XCP_VCN: + ip->inst_mask = + XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn); + /* TODO : Assign IP funcs */ + break; + default: + return -EINVAL; + } + + ip->ip_id = ip_id; + + return 0; +} + +static int soc_v1_0_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, + int mode, + struct amdgpu_xcp_cfg *xcp_cfg) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int max_res[AMDGPU_XCP_RES_MAX] = {}; + bool res_lt_xcp; + int num_xcp, i; + u16 nps_modes; + + if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) + return -EINVAL; + + max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask); + max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances; + max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst; + max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst; + + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + num_xcp = 1; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); + break; + case AMDGPU_DPX_PARTITION_MODE: + num_xcp = 2; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); + break; + case AMDGPU_TPX_PARTITION_MODE: + num_xcp = 3; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + case AMDGPU_QPX_PARTITION_MODE: + num_xcp = 4; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + case AMDGPU_CPX_PARTITION_MODE: + num_xcp = NUM_XCC(adev->gfx.xcc_mask); + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + default: + return -EINVAL; + } + + xcp_cfg->compatible_nps_modes = + (adev->gmc.supported_nps_modes & nps_modes); + xcp_cfg->num_res = ARRAY_SIZE(max_res); + + for (i = 0; i < xcp_cfg->num_res; i++) { + res_lt_xcp = max_res[i] < num_xcp; + xcp_cfg->xcp_res[i].id = i; + xcp_cfg->xcp_res[i].num_inst = + res_lt_xcp ? 1 : max_res[i] / num_xcp; + xcp_cfg->xcp_res[i].num_inst = + i == AMDGPU_XCP_RES_JPEG ? + xcp_cfg->xcp_res[i].num_inst * + adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst; + xcp_cfg->xcp_res[i].num_shared = + res_lt_xcp ? num_xcp / max_res[i] : 1; + } + + return 0; +} + +static enum amdgpu_gfx_partition __soc_v1_0_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + + if (adev->gmc.num_mem_partitions == 1) + return AMDGPU_SPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == num_xcc) + return AMDGPU_CPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == 2) + return AMDGPU_DPX_PARTITION_MODE; + + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + +static bool __soc_v1_0_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, + enum amdgpu_gfx_partition mode) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc, num_xccs_per_xcp; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; + case AMDGPU_DPX_PARTITION_MODE: + return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; + case AMDGPU_TPX_PARTITION_MODE: + return (adev->gmc.num_mem_partitions == 1 || + adev->gmc.num_mem_partitions == 3) && + ((num_xcc % 3) == 0); + case AMDGPU_QPX_PARTITION_MODE: + num_xccs_per_xcp = num_xcc / 4; + return (adev->gmc.num_mem_partitions == 1 || + adev->gmc.num_mem_partitions == 4) && + (num_xccs_per_xcp >= 2); + case AMDGPU_CPX_PARTITION_MODE: + /* (num_xcc > 1) because 1 XCC is considered SPX, not CPX. + * (num_xcc % adev->gmc.num_mem_partitions) == 0 because + * num_compute_partitions can't be less than num_mem_partitions + */ + return ((num_xcc > 1) && + (num_xcc % adev->gmc.num_mem_partitions) == 0); + default: + return false; + } + + return false; +} + +static void __soc_v1_0_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + int mode; + + xcp_mgr->avail_xcp_modes = 0; + + for_each_inst(mode, xcp_mgr->supp_xcp_modes) { + if (__soc_v1_0_is_valid_mode(xcp_mgr, mode)) + xcp_mgr->avail_xcp_modes |= BIT(mode); + } +} + +static int soc_v1_0_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, + int mode, int *num_xcps) +{ + int num_xcc_per_xcp, num_xcc, ret; + struct amdgpu_device *adev; + u32 flags = 0; + + adev = xcp_mgr->adev; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) { + mode = __soc_v1_0_get_auto_mode(xcp_mgr); + if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) { + dev_err(adev->dev, + "Invalid config, no compatible compute partition mode found, available memory partitions: %d", + adev->gmc.num_mem_partitions); + return -EINVAL; + } + } else if (!__soc_v1_0_is_valid_mode(xcp_mgr, mode)) { + dev_err(adev->dev, + "Invalid compute partition mode requested, requested: %s, available memory partitions: %d", + amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions); + return -EINVAL; + } + + if (adev->kfd.init_complete && !amdgpu_in_reset(adev)) + flags |= AMDGPU_XCP_OPS_KFD; + + if (flags & AMDGPU_XCP_OPS_KFD) { + ret = amdgpu_amdkfd_check_and_lock_kfd(adev); + if (ret) + goto out; + } + + ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags); + if (ret) + goto unlock; + + num_xcc_per_xcp = __soc_v1_0_get_xcc_per_xcp(xcp_mgr, mode); + if (adev->gfx.imu.funcs && + adev->gfx.imu.funcs->switch_compute_partition) { + ret = adev->gfx.imu.funcs->switch_compute_partition(xcp_mgr->adev, num_xcc_per_xcp, mode); + if (ret) + goto out; + } + if (adev->gfx.imu.funcs && + adev->gfx.imu.funcs->init_mcm_addr_lut && + amdgpu_emu_mode) + adev->gfx.imu.funcs->init_mcm_addr_lut(adev); + + /* Init info about new xcps */ + *num_xcps = num_xcc / num_xcc_per_xcp; + amdgpu_xcp_init(xcp_mgr, *num_xcps, mode); + + ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags); + if (!ret) + __soc_v1_0_update_available_partition_mode(xcp_mgr); +unlock: + if (flags & AMDGPU_XCP_OPS_KFD) + amdgpu_amdkfd_unlock_kfd(adev); +out: + return ret; +} + +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV +static int __soc_v1_0_get_xcp_mem_id(struct amdgpu_device *adev, + int xcc_id, uint8_t *mem_id) +{ + /* memory/spatial modes validation check is already done */ + *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp; + *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition; + + return 0; +} + +static int soc_v1_0_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr, + struct amdgpu_xcp *xcp, uint8_t *mem_id) +{ + struct amdgpu_numa_info numa_info; + struct amdgpu_device *adev; + uint32_t xcc_mask; + int r, i, xcc_id; + + adev = xcp_mgr->adev; + /* TODO: BIOS is not returning the right info now + * Check on this later + */ + /* + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + */ + if (adev->gmc.num_mem_partitions == 1) { + /* Only one range */ + *mem_id = 0; + return 0; + } + + r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask); + if (r || !xcc_mask) + return -EINVAL; + + xcc_id = ffs(xcc_mask) - 1; + if (!adev->gmc.is_app_apu) + return __soc_v1_0_get_xcp_mem_id(adev, xcc_id, mem_id); + + r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); + + if (r) + return r; + + r = -EINVAL; + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) { + *mem_id = i; + r = 0; + break; + } + } + + return r; +} +#endif + +static int soc_v1_0_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + enum AMDGPU_XCP_IP_BLOCK ip_id, + struct amdgpu_xcp_ip *ip) +{ + if (!ip) + return -EINVAL; + + return __soc_v1_0_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip); +} + +struct amdgpu_xcp_mgr_funcs soc_v1_0_xcp_funcs = { + .switch_partition_mode = &soc_v1_0_switch_partition_mode, + .query_partition_mode = &soc_v1_0_query_partition_mode, + .get_ip_details = &soc_v1_0_get_xcp_ip_details, + .get_xcp_res_info = &soc_v1_0_get_xcp_res_info, +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV + .get_xcp_mem_id = &soc_v1_0_get_xcp_mem_id, +#endif +}; + +static int soc_v1_0_xcp_mgr_init(struct amdgpu_device *adev) +{ + int ret; + + if (amdgpu_sriov_vf(adev)) + soc_v1_0_xcp_funcs.switch_partition_mode = NULL; + + ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, + 1, &soc_v1_0_xcp_funcs); + if (ret) + return ret; + + amdgpu_xcp_update_supported_modes(adev->xcp_mgr); + /* TODO: Default memory node affinity init */ + + return ret; +} + +int soc_v1_0_init_soc_config(struct amdgpu_device *adev) +{ + int ret, i; + int xcc_inst_per_aid = 4; + uint16_t xcc_mask; + + xcc_mask = adev->gfx.xcc_mask; + adev->aid_mask = 0; + for (i = 0; xcc_mask; xcc_mask >>= xcc_inst_per_aid, i++) { + if (xcc_mask & ((1U << xcc_inst_per_aid) - 1)) + adev->aid_mask |= (1 << i); + } + + adev->sdma.num_inst_per_xcc = 2; + adev->sdma.num_instances = + NUM_XCC(adev->gfx.xcc_mask) * adev->sdma.num_inst_per_xcc; + adev->sdma.sdma_mask = + GENMASK(adev->sdma.num_instances - 1, 0); + + ret = soc_v1_0_xcp_mgr_init(adev); + if (ret) + return ret; + + amdgpu_ip_map_init(adev); + + return 0; +} + +bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg) +{ + if (((reg >= XCC_REG_RANGE_0_LOW) && (reg < XCC_REG_RANGE_0_HIGH)) || + ((reg >= XCC_REG_RANGE_1_LOW) && (reg < XCC_REG_RANGE_1_HIGH))) + return true; + else + return false; +} + +uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg) +{ + uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); + + /* If it is an XCC reg, normalize the reg to keep + * lower 16 bits in local xcc */ + + if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) + return normalized_reg; + else + return reg; +} diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h new file mode 100644 index 000000000000..146996101aa0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h @@ -0,0 +1,37 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SOC_V1_0_H__ +#define __SOC_V1_0_H__ + +extern const struct amdgpu_ip_block_version soc_v1_0_common_ip_block; + +void soc_v1_0_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, + u32 queue, u32 vmid, + int xcc_id); +int soc_v1_0_init_soc_config(struct amdgpu_device *adev); +bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg); +uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg); +u64 soc_v1_0_encode_ext_smn_addressing(int ext_id); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 8a3f326474e5..f6178da910d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -155,6 +155,7 @@ struct ta_ras_init_flags { uint8_t channel_dis_num; uint8_t nps_mode; uint32_t active_umc_mask; + uint8_t vram_type; }; struct ta_ras_mca_addr { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index f17d297b594b..f2dfacb952b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -267,7 +267,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, if ((adev->asic_type == CHIP_ARCTURUS) && amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) - DRM_WARN("Fail to disable DF-Cstate.\n"); + drm_warn(adev_to_drm(adev), + "Fail to disable DF-Cstate.\n"); LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, @@ -284,7 +285,7 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, if ((adev->asic_type == CHIP_ARCTURUS) && amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) - DRM_WARN("Fail to enable DF-Cstate\n"); + drm_warn(adev_to_drm(adev), "Fail to enable DF-Cstate\n"); if (rsmu_umc_index_state) umc_v6_1_enable_umc_index_mode(adev); @@ -366,7 +367,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, if ((adev->asic_type == CHIP_ARCTURUS) && amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) - DRM_WARN("Fail to disable DF-Cstate.\n"); + drm_warn(adev_to_drm(adev), "Fail to disable DF-Cstate.\n"); LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, @@ -382,7 +383,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, if ((adev->asic_type == CHIP_ARCTURUS) && amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) - DRM_WARN("Fail to enable DF-Cstate\n"); + drm_warn(adev_to_drm(adev), "Fail to enable DF-Cstate\n"); if (rsmu_umc_index_state) umc_v6_1_enable_umc_index_mode(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 2e79a3afc774..fea576a7f397 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -660,7 +660,7 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) r = uvd_v3_1_fw_validate(adev); if (r) { - DRM_ERROR("amdgpu: UVD Firmware validate fail (%d).\n", r); + drm_err(adev_to_drm(adev), "UVD Firmware validate fail (%d).\n", r); return r; } @@ -668,13 +668,13 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_ring_test_helper(ring); if (r) { - DRM_ERROR("amdgpu: UVD ring test fail (%d).\n", r); + drm_err(adev_to_drm(adev), "UVD ring test failed (%d).\n", r); goto done; } r = amdgpu_ring_alloc(ring, 10); if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r); goto done; } @@ -701,7 +701,7 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) done: if (!r) - DRM_INFO("UVD initialized successfully.\n"); + drm_info(adev_to_drm(adev), "UVD initialized successfully.\n"); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 4b96fd583772..73ce3d211ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -167,7 +167,7 @@ static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_ring_alloc(ring, 10); if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r); goto done; } @@ -194,7 +194,7 @@ static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block) done: if (!r) - DRM_INFO("UVD initialized successfully.\n"); + drm_info(adev_to_drm(adev), "UVD initialized successfully.\n"); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 71409ad8b7ed..454f109cbb2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -164,7 +164,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_ring_alloc(ring, 10); if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r); goto done; } @@ -191,7 +191,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block) done: if (!r) - DRM_INFO("UVD initialized successfully.\n"); + drm_info(adev_to_drm(adev), "UVD initialized successfully.\n"); return r; @@ -846,7 +846,7 @@ static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 if (RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { - DRM_INFO("Cannot get clockgating state when UVD is powergated.\n"); + drm_info(adev_to_drm(adev), "Cannot get clockgating state when UVD is powergated.\n"); goto out; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index ceb94bbb03a4..ecd7ead7a60b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -408,7 +408,7 @@ static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block) adev->uvd.inst->irq.num_types = 1; adev->uvd.num_enc_rings = 0; - DRM_INFO("UVD ENC is disabled\n"); + drm_info(adev_to_drm(adev), "UVD ENC is disabled\n"); } ring = &adev->uvd.inst->ring; @@ -478,7 +478,7 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_ring_alloc(ring, 10); if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r); goto done; } @@ -515,9 +515,9 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block) done: if (!r) { if (uvd_v6_0_enc_support(adev)) - DRM_INFO("UVD and UVD ENC initialized successfully.\n"); + drm_info(adev_to_drm(adev), "UVD and UVD ENC initialized successfully.\n"); else - DRM_INFO("UVD initialized successfully.\n"); + drm_info(adev_to_drm(adev), "UVD initialized successfully.\n"); } return r; @@ -1513,7 +1513,7 @@ static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 data = RREG32_SMC(ixCURRENT_PG_STATUS); if (data & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { - DRM_INFO("Cannot get clockgating state when UVD is powergated.\n"); + drm_info(adev_to_drm(adev), "Cannot get clockgating state when UVD is powergated.\n"); goto out; } @@ -1633,10 +1633,10 @@ static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) { if (adev->asic_type >= CHIP_POLARIS10) { adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs; - DRM_INFO("UVD is enabled in VM mode\n"); + drm_info(adev_to_drm(adev), "UVD is enabled in VM mode\n"); } else { adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs; - DRM_INFO("UVD is enabled in physical mode\n"); + drm_info(adev_to_drm(adev), "UVD is enabled in physical mode\n"); } } @@ -1647,7 +1647,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->uvd.num_enc_rings; ++i) adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; - DRM_INFO("UVD ENC is enabled in VM mode\n"); + drm_info(adev_to_drm(adev), "UVD ENC is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 1f8866f3f63c..df2c83348315 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -438,7 +438,7 @@ static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading UVD firmware\n"); + drm_info(adev_to_drm(adev), "PSP loading UVD firmware\n"); } for (j = 0; j < adev->uvd.num_uvd_inst; j++) { @@ -542,7 +542,7 @@ static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_ring_alloc(ring, 10); if (r) { - DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r); + drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r); goto done; } @@ -582,7 +582,7 @@ static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block) } done: if (!r) - DRM_INFO("UVD and UVD ENC initialized successfully.\n"); + drm_info(adev_to_drm(adev), "UVD and UVD ENC initialized successfully.\n"); return r; } @@ -1606,7 +1606,7 @@ static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) continue; adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs; adev->uvd.inst[i].ring.me = i; - DRM_INFO("UVD(%d) is enabled in VM mode\n", i); + drm_info(adev_to_drm(adev), "UVD(%d) is enabled in VM mode\n", i); } } @@ -1622,7 +1622,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) adev->uvd.inst[j].ring_enc[i].me = j; } - DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j); + drm_info(adev_to_drm(adev), "UVD(%d) ENC is enabled in VM mode\n", j); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 8ea8a6193492..db149eda6204 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -280,7 +280,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) if (vce_v2_0_lmi_clean(adev)) { - DRM_INFO("VCE is not idle \n"); + drm_info(adev_to_drm(adev), "VCE is not idle\n"); return 0; } @@ -289,7 +289,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) return -EINVAL; if (vce_v2_0_wait_for_idle(ip_block)) { - DRM_INFO("VCE is busy, Can't set clock gating"); + drm_info(adev_to_drm(adev), "VCE is busy, Can't set clock gating"); return 0; } @@ -481,7 +481,7 @@ static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } - DRM_INFO("VCE initialized successfully.\n"); + drm_info(adev_to_drm(adev), "VCE initialized successfully.\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 719e9643c43d..03d79e464f04 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -485,7 +485,7 @@ static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } - DRM_INFO("VCE initialized successfully.\n"); + drm_info(adev_to_drm(adev), "VCE initialized successfully.\n"); return 0; } @@ -846,7 +846,7 @@ static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 data = RREG32_SMC(ixCURRENT_PG_STATUS); if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) { - DRM_INFO("Cannot get clockgating state when VCE is powergated.\n"); + drm_info(adev_to_drm(adev), "Cannot get clockgating state when VCE is powergated.\n"); goto out; } @@ -978,13 +978,13 @@ static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs; adev->vce.ring[i].me = i; } - DRM_INFO("VCE enabled in VM mode\n"); + drm_info(adev_to_drm(adev), "VCE enabled in VM mode\n"); } else { for (i = 0; i < adev->vce.num_rings; i++) { adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs; adev->vce.ring[i].me = i; } - DRM_INFO("VCE enabled in physical mode\n"); + drm_info(adev_to_drm(adev), "VCE enabled in physical mode\n"); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 2d64002bed61..ee445d8abe47 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -460,7 +460,7 @@ static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block) adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCE firmware\n"); + drm_info(adev_to_drm(adev), "PSP loading VCE firmware\n"); } else { r = amdgpu_vce_resume(adev); if (r) @@ -536,7 +536,7 @@ static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } - DRM_INFO("VCE initialized successfully.\n"); + drm_info(adev_to_drm(adev), "VCE initialized successfully.\n"); return 0; } @@ -864,7 +864,7 @@ static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; adev->vce.ring[i].me = i; } - DRM_INFO("VCE enabled in VM mode\n"); + drm_info(adev_to_drm(adev), "VCE enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a316797875a8..e9d790914761 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -2053,6 +2053,7 @@ static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser, { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_device *adev = parser->adev; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8897dcc9c1a0..e35fae9cdaf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1964,7 +1964,8 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) struct mmsch_v2_0_cmd_end end = { {0} }; struct mmsch_v2_0_init_header *header; uint32_t *init_table = adev->virt.mm_table.cpu_addr; - uint8_t i = 0; + + /* This path only programs VCN instance 0. */ header = (struct mmsch_v2_0_init_header *)init_table; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -1983,93 +1984,93 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0xFFFFFFFF, 0x00000004); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi); offset = 0; } else { MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; } MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), + SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), + SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), + SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), + SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), + SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) { ring = &adev->vcn.inst->ring_enc[r]; ring->wptr = 0; MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), + SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), + SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), + SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); } ring = &adev->vcn.inst->ring_dec; ring->wptr = 0; MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), lower_32_bits(ring->gpu_addr)); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), upper_32_bits(ring->gpu_addr)); /* force RBC into idle state */ @@ -2080,7 +2081,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); MMSCH_V2_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); + SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); /* add end packet */ tmp = sizeof(struct mmsch_v2_0_cmd_end); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index d9cf8f0feeb3..02d5c5af65f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1907,6 +1907,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_device *adev = p->adev; struct amdgpu_bo_va_mapping *map; uint32_t *msg, num_buffers; struct amdgpu_bo *bo; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 3ae666522d57..d17219be50f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1824,6 +1824,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_device *adev = p->adev; struct amdgpu_bo_va_mapping *map; uint32_t *msg, num_buffers; struct amdgpu_bo *bo; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index a611a7345125..6a574b6c8e63 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1424,10 +1424,6 @@ static bool vi_need_reset_on_init(struct amdgpu_device *adev) return false; } -static void vi_pre_asic_init(struct amdgpu_device *adev) -{ -} - static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -1447,7 +1443,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .need_reset_on_init = &vi_need_reset_on_init, .get_pcie_replay_count = &vi_get_pcie_replay_count, .supports_baco = &vi_asic_supports_baco, - .pre_asic_init = &vi_pre_asic_init, .query_video_codecs = &vi_query_video_codecs, }; |
