summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c63
1 files changed, 54 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 80c4fa2b0975..56c97189e7f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -392,12 +392,20 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
return 0;
}
-#define SGPR_SIZE_PER_CU 0x4000
-#define LDS_SIZE_PER_CU 0x10000
-#define HWREG_SIZE_PER_CU 0x1000
#define DEBUGGER_BYTES_ALIGN 64
#define DEBUGGER_BYTES_PER_WAVE 32
+static u32 kfd_get_sgpr_size_per_cu(u32 gfxv)
+{
+ u32 sgpr_size = 0x4000;
+
+ if (gfxv == 120500 ||
+ gfxv == 120501)
+ sgpr_size = 0x8000;
+
+ return sgpr_size;
+}
+
static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
{
u32 vgpr_size = 0x40000;
@@ -413,14 +421,53 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
gfxv == 120000 || /* GFX_VERSION_GFX1200 */
gfxv == 120001) /* GFX_VERSION_GFX1201 */
vgpr_size = 0x60000;
+ else if (gfxv == 120500 || /* GFX_VERSION_GFX1250 */
+ gfxv == 120501) /* GFX_VERSION_GFX1251 */
+ vgpr_size = 0x80000;
return vgpr_size;
}
+static u32 kfd_get_hwreg_size_per_cu(u32 gfxv)
+{
+ u32 hwreg_size = 0x1000;
+
+ if (gfxv == 120500 || gfxv == 120501)
+ hwreg_size = 0x8000;
+
+ return hwreg_size;
+}
+
+static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props)
+{
+ u32 lds_size = 0x10000;
+
+ if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501)
+ lds_size = props->lds_size_in_kb << 10;
+
+ return lds_size;
+}
+
+static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 cu_num)
+{
+ u32 wave_num = 0;
+
+ if (gfxv < 100100)
+ wave_num = min(cu_num * 40,
+ props->array_count / props->simd_arrays_per_engine * 512);
+ else if (gfxv < 120500)
+ wave_num = cu_num * 32;
+ else if (gfxv <= 120501)
+ wave_num = cu_num * 64;
+
+ WARN_ON(wave_num == 0);
+
+ return wave_num;
+}
+
#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
- (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
- (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
- HWREG_SIZE_PER_CU)
+ (kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\
+ kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv))
#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
@@ -440,9 +487,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
return;
cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
- wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
- min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
- : cu_num * 32;
+ wave_num = get_num_waves(props, gfxv, cu_num);
wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;