diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno')
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx.xml.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 195 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_power.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 105 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 49 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 139 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_device.c | 54 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 23 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.h | 22 |
10 files changed, 467 insertions, 128 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a5xx.xml.h b/drivers/gpu/drm/msm/adreno/a5xx.xml.h index 346cc6ff3a36..7b9fcfe95c04 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a5xx.xml.h @@ -2367,6 +2367,8 @@ static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) #define REG_A5XX_UCHE_ADDR_MODE_CNTL 0x00000e80 +#define REG_A5XX_UCHE_MODE_CNTL 0x00000e81 + #define REG_A5XX_UCHE_SVM_CNTL 0x00000e82 #define REG_A5XX_UCHE_WRITE_THRU_BASE_LO 0x00000e87 diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index a5af223eaf50..7e553d3efeb2 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -222,7 +222,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) a5xx_preempt_trigger(gpu); } -static const struct { +static const struct adreno_five_hwcg_regs { u32 offset; u32 value; } a5xx_hwcg[] = { @@ -318,16 +318,124 @@ static const struct { {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} +}, a50x_hwcg[] = { + {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4}, + {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}, a512_hwcg[] = { + {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, }; void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - unsigned int i; + const struct adreno_five_hwcg_regs *regs; + unsigned int i, sz; + + if (adreno_is_a508(adreno_gpu)) { + regs = a50x_hwcg; + sz = ARRAY_SIZE(a50x_hwcg); + } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) { + regs = a512_hwcg; + sz = ARRAY_SIZE(a512_hwcg); + } else { + regs = a5xx_hwcg; + sz = ARRAY_SIZE(a5xx_hwcg); + } - for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) - gpu_write(gpu, a5xx_hwcg[i].offset, - state ? a5xx_hwcg[i].value : 0); + for (i = 0; i < sz; i++) + gpu_write(gpu, regs[i].offset, + state ? regs[i].value : 0); if (adreno_is_a540(adreno_gpu)) { gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0); @@ -538,11 +646,13 @@ static int a5xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + u32 regbit; int ret; gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); - if (adreno_is_a540(adreno_gpu)) + if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) || + adreno_is_a540(adreno_gpu)) gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); /* Make all blocks contribute to the GPU BUSY perf counter */ @@ -604,29 +714,48 @@ static int a5xx_hw_init(struct msm_gpu *gpu) 0x00100000 + adreno_gpu->gmem - 1); gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); - if (adreno_is_a510(adreno_gpu)) { + if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) { gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); + if (adreno_is_a508(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); + else + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); - gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, - (0x200 << 11 | 0x200 << 22)); } else { gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); if (adreno_is_a530(adreno_gpu)) gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); - if (adreno_is_a540(adreno_gpu)) + else gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + } + + if (adreno_is_a508(adreno_gpu)) + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x100 << 11 | 0x100 << 22)); + else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) || + adreno_is_a512(adreno_gpu)) + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + else gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); - } if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); - gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); + /* + * Disable the RB sampler datapath DP2 clock gating optimization + * for 1-SP GPUs, as it is enabled by default. + */ + if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) || + adreno_is_a512(adreno_gpu)) + gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9)); + + /* Disable UCHE global filter as SP can invalidate/flush independently */ + gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29)); /* Enable USE_RETENTION_FLOPS */ gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); @@ -653,10 +782,20 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); /* Set the highest bank bit */ - gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); - gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); if (adreno_is_a540(adreno_gpu)) - gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2); + regbit = 2; + else + regbit = 1; + + gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7); + gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1); + + if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) || + adreno_is_a540(adreno_gpu)) + gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit); + + /* Disable All flat shading optimization (ALLFLATOPTDIS) */ + gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10)); /* Protect registers from the CP */ gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); @@ -688,12 +827,14 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* VPC */ gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); - gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); + gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16)); /* UCHE */ gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); - if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) + if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) || + adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu) || + adreno_is_a530(adreno_gpu)) gpu_write(gpu, REG_A5XX_CP_PROTECT(17), ADRENO_PROTECT_RW(0x10000, 0x8000)); @@ -735,7 +876,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; - if (!adreno_is_a510(adreno_gpu)) + if (!(adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) || + adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu))) a5xx_gpmu_ucode_init(gpu); ret = a5xx_ucode_init(gpu); @@ -1168,7 +1310,8 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; - if (adreno_is_a510(adreno_gpu)) { + /* Adreno 508, 509, 510, 512 needs manual RBBM sus/res control */ + if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) { /* Halt the sp_input_clk at HM level */ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); a5xx_set_hwcg(gpu, true); @@ -1210,8 +1353,8 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu) u32 mask = 0xf; int i, ret; - /* A510 has 3 XIN ports in VBIF */ - if (adreno_is_a510(adreno_gpu)) + /* A508, A510 have 3 XIN ports in VBIF */ + if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) mask = 0x7; /* Clear the VBIF pipe before shutting down */ @@ -1223,10 +1366,12 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu) /* * Reset the VBIF before power collapse to avoid issue with FIFO - * entries + * entries on Adreno A510 and A530 (the others will tend to lock up) */ - gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); - gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); + if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) { + gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); + gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); + } ret = msm_gpu_pm_suspend(gpu); if (ret) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index f176a6f3eff6..5ccc9da455a1 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -298,7 +298,7 @@ int a5xx_power_init(struct msm_gpu *gpu) int ret; /* Not all A5xx chips have a GPMU */ - if (adreno_is_a510(adreno_gpu)) + if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) return 0; /* Set up the limits management */ @@ -330,7 +330,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) unsigned int *data, *ptr, *cmds; unsigned int cmds_size; - if (adreno_is_a510(adreno_gpu)) + if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) return; if (a5xx_gpu->gpmu_bo) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 05e0ef58fe32..71c917f909af 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -245,37 +245,66 @@ static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu) return ret; } +struct a6xx_gmu_oob_bits { + int set, ack, set_new, ack_new; + const char *name; +}; + +/* These are the interrupt / ack bits for each OOB request that are set + * in a6xx_gmu_set_oob and a6xx_clear_oob + */ +static const struct a6xx_gmu_oob_bits a6xx_gmu_oob_bits[] = { + [GMU_OOB_GPU_SET] = { + .name = "GPU_SET", + .set = 16, + .ack = 24, + .set_new = 30, + .ack_new = 31, + }, + + [GMU_OOB_PERFCOUNTER_SET] = { + .name = "PERFCOUNTER", + .set = 17, + .ack = 25, + .set_new = 28, + .ack_new = 30, + }, + + [GMU_OOB_BOOT_SLUMBER] = { + .name = "BOOT_SLUMBER", + .set = 22, + .ack = 30, + }, + + [GMU_OOB_DCVS_SET] = { + .name = "GPU_DCVS", + .set = 23, + .ack = 31, + }, +}; + /* Trigger a OOB (out of band) request to the GMU */ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) { int ret; u32 val; int request, ack; - const char *name; - switch (state) { - case GMU_OOB_GPU_SET: - if (gmu->legacy) { - request = GMU_OOB_GPU_SET_REQUEST; - ack = GMU_OOB_GPU_SET_ACK; - } else { - request = GMU_OOB_GPU_SET_REQUEST_NEW; - ack = GMU_OOB_GPU_SET_ACK_NEW; - } - name = "GPU_SET"; - break; - case GMU_OOB_BOOT_SLUMBER: - request = GMU_OOB_BOOT_SLUMBER_REQUEST; - ack = GMU_OOB_BOOT_SLUMBER_ACK; - name = "BOOT_SLUMBER"; - break; - case GMU_OOB_DCVS_SET: - request = GMU_OOB_DCVS_REQUEST; - ack = GMU_OOB_DCVS_ACK; - name = "GPU_DCVS"; - break; - default: + if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits)) return -EINVAL; + + if (gmu->legacy) { + request = a6xx_gmu_oob_bits[state].set; + ack = a6xx_gmu_oob_bits[state].ack; + } else { + request = a6xx_gmu_oob_bits[state].set_new; + ack = a6xx_gmu_oob_bits[state].ack_new; + if (!request || !ack) { + DRM_DEV_ERROR(gmu->dev, + "Invalid non-legacy GMU request %s\n", + a6xx_gmu_oob_bits[state].name); + return -EINVAL; + } } /* Trigger the equested OOB operation */ @@ -288,7 +317,7 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) if (ret) DRM_DEV_ERROR(gmu->dev, "Timeout waiting for GMU OOB set %s: 0x%x\n", - name, + a6xx_gmu_oob_bits[state].name, gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO)); /* Clear the acknowledge interrupt */ @@ -300,27 +329,17 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) /* Clear a pending OOB state in the GMU */ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) { - if (!gmu->legacy) { - WARN_ON(state != GMU_OOB_GPU_SET); - gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, - 1 << GMU_OOB_GPU_SET_CLEAR_NEW); + int bit; + + if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits)) return; - } - switch (state) { - case GMU_OOB_GPU_SET: - gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, - 1 << GMU_OOB_GPU_SET_CLEAR); - break; - case GMU_OOB_BOOT_SLUMBER: - gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, - 1 << GMU_OOB_BOOT_SLUMBER_CLEAR); - break; - case GMU_OOB_DCVS_SET: - gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, - 1 << GMU_OOB_DCVS_CLEAR); - break; - } + if (gmu->legacy) + bit = a6xx_gmu_oob_bits[state].ack; + else + bit = a6xx_gmu_oob_bits[state].ack_new; + + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, bit); } /* Enable CPU control of SPTP power power collapse */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index c6d2bced8e5d..71dfa60070cc 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -153,44 +153,27 @@ static inline void gmu_write_rscc(struct a6xx_gmu *gmu, u32 offset, u32 value) */ enum a6xx_gmu_oob_state { + /* + * Let the GMU know that a boot or slumber operation has started. The value in + * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are + * doing + */ GMU_OOB_BOOT_SLUMBER = 0, + /* + * Let the GMU know to not turn off any GPU registers while the CPU is in a + * critical section + */ GMU_OOB_GPU_SET, + /* + * Set a new power level for the GPU when the CPU is doing frequency scaling + */ GMU_OOB_DCVS_SET, + /* + * Used to keep the GPU on for CPU-side reads of performance counters. + */ + GMU_OOB_PERFCOUNTER_SET, }; -/* These are the interrupt / ack bits for each OOB request that are set - * in a6xx_gmu_set_oob and a6xx_clear_oob - */ - -/* - * Let the GMU know that a boot or slumber operation has started. The value in - * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are - * doing - */ -#define GMU_OOB_BOOT_SLUMBER_REQUEST 22 -#define GMU_OOB_BOOT_SLUMBER_ACK 30 -#define GMU_OOB_BOOT_SLUMBER_CLEAR 30 - -/* - * Set a new power level for the GPU when the CPU is doing frequency scaling - */ -#define GMU_OOB_DCVS_REQUEST 23 -#define GMU_OOB_DCVS_ACK 31 -#define GMU_OOB_DCVS_CLEAR 31 - -/* - * Let the GMU know to not turn off any GPU registers while the CPU is in a - * critical section - */ -#define GMU_OOB_GPU_SET_REQUEST 16 -#define GMU_OOB_GPU_SET_ACK 24 -#define GMU_OOB_GPU_SET_CLEAR 24 - -#define GMU_OOB_GPU_SET_REQUEST_NEW 30 -#define GMU_OOB_GPU_SET_ACK_NEW 31 -#define GMU_OOB_GPU_SET_CLEAR_NEW 31 - - void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); void a6xx_hfi_stop(struct a6xx_gmu *gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 130661898546..ba8e9d3cf0fe 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/devfreq.h> +#include <linux/nvmem-consumer.h> #include <linux/soc/qcom/llcc-qcom.h> #define GPU_PAS_ID 13 @@ -1117,7 +1118,7 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); - if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice)) + if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); } @@ -1169,14 +1170,18 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + static DEFINE_MUTEX(perfcounter_oob); + + mutex_lock(&perfcounter_oob); /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); *value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO, REG_A6XX_RBBM_PERFCTR_CP_0_HI); - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); + mutex_unlock(&perfcounter_oob); return 0; } @@ -1208,6 +1213,10 @@ static void a6xx_destroy(struct msm_gpu *gpu) a6xx_gmu_remove(a6xx_gpu); adreno_gpu_cleanup(adreno_gpu); + + if (a6xx_gpu->opp_table) + dev_pm_opp_put_supported_hw(a6xx_gpu->opp_table); + kfree(a6xx_gpu); } @@ -1240,6 +1249,50 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) } static struct msm_gem_address_space * +a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct iommu_domain *iommu; + struct msm_mmu *mmu; + struct msm_gem_address_space *aspace; + u64 start, size; + + iommu = iommu_domain_alloc(&platform_bus_type); + if (!iommu) + return NULL; + + /* + * This allows GPU to set the bus attributes required to use system + * cache on behalf of the iommu page table walker. + */ + if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) + adreno_set_llc_attributes(iommu); + + mmu = msm_iommu_new(&pdev->dev, iommu); + if (IS_ERR(mmu)) { + iommu_domain_free(iommu); + return ERR_CAST(mmu); + } + + /* + * Use the aperture start or SZ_16M, whichever is greater. This will + * ensure that we align with the allocated pagetable range while still + * allowing room in the lower 32 bits for GMEM and whatnot + */ + start = max_t(u64, SZ_16M, iommu->geometry.aperture_start); + size = iommu->geometry.aperture_end - start + 1; + + aspace = msm_gem_address_space_create(mmu, "gpu", + start & GENMASK_ULL(48, 0), size); + + if (IS_ERR(aspace) && !IS_ERR(mmu)) + mmu->funcs->destroy(mmu); + + return aspace; +} + +static struct msm_gem_address_space * a6xx_create_private_address_space(struct msm_gpu *gpu) { struct msm_mmu *mmu; @@ -1264,6 +1317,78 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } +static u32 a618_get_speed_bin(u32 fuse) +{ + if (fuse == 0) + return 0; + else if (fuse == 169) + return 1; + else if (fuse == 174) + return 2; + + return UINT_MAX; +} + +static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse) +{ + u32 val = UINT_MAX; + + if (revn == 618) + val = a618_get_speed_bin(fuse); + + if (val == UINT_MAX) { + DRM_DEV_ERROR(dev, + "missing support for speed-bin: %u. Some OPPs may not be supported by hardware", + fuse); + return UINT_MAX; + } + + return (1 << val); +} + +static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, + u32 revn) +{ + struct opp_table *opp_table; + struct nvmem_cell *cell; + u32 supp_hw = UINT_MAX; + void *buf; + + cell = nvmem_cell_get(dev, "speed_bin"); + /* + * -ENOENT means that the platform doesn't support speedbin which is + * fine + */ + if (PTR_ERR(cell) == -ENOENT) + return 0; + else if (IS_ERR(cell)) { + DRM_DEV_ERROR(dev, + "failed to read speed-bin. Some OPPs may not be supported by hardware"); + goto done; + } + + buf = nvmem_cell_read(cell, NULL); + if (IS_ERR(buf)) { + nvmem_cell_put(cell); + DRM_DEV_ERROR(dev, + "failed to read speed-bin. Some OPPs may not be supported by hardware"); + goto done; + } + + supp_hw = fuse_to_supp_hw(dev, revn, *((u32 *) buf)); + + kfree(buf); + nvmem_cell_put(cell); + +done: + opp_table = dev_pm_opp_set_supported_hw(dev, &supp_hw, 1); + if (IS_ERR(opp_table)) + return PTR_ERR(opp_table); + + a6xx_gpu->opp_table = opp_table; + return 0; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -1285,7 +1410,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, #endif - .create_address_space = adreno_iommu_create_address_space, + .create_address_space = a6xx_create_address_space, .create_private_address_space = a6xx_create_private_address_space, .get_rptr = a6xx_get_rptr, }, @@ -1325,6 +1450,12 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) a6xx_llc_slices_init(pdev, a6xx_gpu); + ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info->revn); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index e793d329e77b..ce0610c5256f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -33,6 +33,8 @@ struct a6xx_gpu { void *llc_slice; void *htw_llc_slice; bool have_mmu500; + + struct opp_table *opp_table; }; #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 12e75ba360f9..600d445fabe8 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -134,6 +134,41 @@ static const struct adreno_info gpulist[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a4xx_gpu_init, }, { + .rev = ADRENO_REV(5, 0, 8, ANY_ID), + .revn = 508, + .name = "A508", + .fw = { + [ADRENO_FW_PM4] = "a530_pm4.fw", + [ADRENO_FW_PFP] = "a530_pfp.fw", + }, + .gmem = (SZ_128K + SZ_8K), + /* + * Increase inactive period to 250 to avoid bouncing + * the GDSC which appears to make it grumpy + */ + .inactive_period = 250, + .quirks = ADRENO_QUIRK_LMLOADKILL_DISABLE, + .init = a5xx_gpu_init, + .zapfw = "a508_zap.mdt", + }, { + .rev = ADRENO_REV(5, 0, 9, ANY_ID), + .revn = 509, + .name = "A509", + .fw = { + [ADRENO_FW_PM4] = "a530_pm4.fw", + [ADRENO_FW_PFP] = "a530_pfp.fw", + }, + .gmem = (SZ_256K + SZ_16K), + /* + * Increase inactive period to 250 to avoid bouncing + * the GDSC which appears to make it grumpy + */ + .inactive_period = 250, + .quirks = ADRENO_QUIRK_LMLOADKILL_DISABLE, + .init = a5xx_gpu_init, + /* Adreno 509 uses the same ZAP as 512 */ + .zapfw = "a512_zap.mdt", + }, { .rev = ADRENO_REV(5, 1, 0, ANY_ID), .revn = 510, .name = "A510", @@ -149,6 +184,23 @@ static const struct adreno_info gpulist[] = { .inactive_period = 250, .init = a5xx_gpu_init, }, { + .rev = ADRENO_REV(5, 1, 2, ANY_ID), + .revn = 512, + .name = "A512", + .fw = { + [ADRENO_FW_PM4] = "a530_pm4.fw", + [ADRENO_FW_PFP] = "a530_pfp.fw", + }, + .gmem = (SZ_256K + SZ_16K), + /* + * Increase inactive period to 250 to avoid bouncing + * the GDSC which appears to make it grumpy + */ + .inactive_period = 250, + .quirks = ADRENO_QUIRK_LMLOADKILL_DISABLE, + .init = a5xx_gpu_init, + .zapfw = "a512_zap.mdt", + }, { .rev = ADRENO_REV(5, 3, 0, 2), .revn = 530, .name = "A530", @@ -168,7 +220,7 @@ static const struct adreno_info gpulist[] = { .init = a5xx_gpu_init, .zapfw = "a530_zap.mdt", }, { - .rev = ADRENO_REV(5, 4, 0, 2), + .rev = ADRENO_REV(5, 4, 0, ANY_ID), .revn = 540, .name = "A540", .fw = { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f09175698827..0f184c3dd9d9 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -186,11 +186,18 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid); } +void adreno_set_llc_attributes(struct iommu_domain *iommu) +{ + struct io_pgtable_domain_attr pgtbl_cfg; + + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; + iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); +} + struct msm_gem_address_space * adreno_iommu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct iommu_domain *iommu; struct msm_mmu *mmu; struct msm_gem_address_space *aspace; @@ -200,20 +207,6 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, if (!iommu) return NULL; - - if (adreno_is_a6xx(adreno_gpu)) { - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct io_pgtable_domain_attr pgtbl_cfg; - /* - * This allows GPU to set the bus attributes required to use system - * cache on behalf of the iommu page table walker. - */ - if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); - } - } - mmu = msm_iommu_new(&pdev->dev, iommu); if (IS_ERR(mmu)) { iommu_domain_free(iommu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index b3d9a333591b..ccac275aa7a2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -197,11 +197,26 @@ static inline int adreno_is_a430(struct adreno_gpu *gpu) return gpu->revn == 430; } +static inline int adreno_is_a508(struct adreno_gpu *gpu) +{ + return gpu->revn == 508; +} + +static inline int adreno_is_a509(struct adreno_gpu *gpu) +{ + return gpu->revn == 509; +} + static inline int adreno_is_a510(struct adreno_gpu *gpu) { return gpu->revn == 510; } +static inline int adreno_is_a512(struct adreno_gpu *gpu) +{ + return gpu->revn == 512; +} + static inline int adreno_is_a530(struct adreno_gpu *gpu) { return gpu->revn == 530; @@ -212,11 +227,6 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu) return gpu->revn == 540; } -static inline bool adreno_is_a6xx(struct adreno_gpu *gpu) -{ - return ((gpu->revn < 700 && gpu->revn > 599)); -} - static inline int adreno_is_a618(struct adreno_gpu *gpu) { return gpu->revn == 618; @@ -278,6 +288,8 @@ struct msm_gem_address_space * adreno_iommu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev); +void adreno_set_llc_attributes(struct iommu_domain *iommu); + /* * For a5xx and a6xx targets load the zap shader that is used to pull the GPU * out of secure mode |
