From 81921a828b94ce2816932c19a5ec74d302972833 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Jun 2020 20:37:43 +0300 Subject: drm/amd/display: Use kvfree() to free coeff in build_regamma() Use kvfree() instead of kfree() to free coeff in build_regamma() because the memory is allocated with kvzalloc(). Fixes: e752058b8671 ("drm/amd/display: Optimize gamma calculations") Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 9431b48aecb4..56bb1f9f77ce 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma, pow_buffer_ptr = -1; // reset back to no optimize ret = true; release: - kfree(coeff); + kvfree(coeff); return ret; } -- cgit v1.2.3 From 43a562774fceba867e8eebba977d7d42f8a2eac7 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Jun 2020 20:37:44 +0300 Subject: drm/amd/display: Use kfree() to free rgb_user in calculate_user_regamma_ramp() Use kfree() instead of kvfree() to free rgb_user in calculate_user_regamma_ramp() because the memory is allocated with kcalloc(). Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 56bb1f9f77ce..bcfe34ef8c28 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1777,7 +1777,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, kfree(rgb_regamma); rgb_regamma_alloc_fail: - kvfree(rgb_user); + kfree(rgb_user); rgb_user_alloc_fail: return ret; } -- cgit v1.2.3 From 790243d3bf78f9830a3b2ffbca1ed0f528295d48 Mon Sep 17 00:00:00 2001 From: Sandeep Raghuraman Date: Thu, 11 Jun 2020 01:36:26 +0530 Subject: drm/amdgpu: Replace invalid device ID with a valid device ID Initializes Powertune data for a specific Hawaii card by fixing what looks like a typo in the code. The device ID 66B1 is not a supported device ID for this driver, and is not mentioned elsewhere. 67B1 is a valid device ID, and is a Hawaii Pro GPU. I have tested on my R9 390 which has device ID 67B1, and it works fine without problems. Signed-off-by: Sandeep Raghuraman Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 85e5b1ed22c2..56923a96b450 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -239,7 +239,7 @@ static void ci_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) switch (dev_id) { case 0x67BA: - case 0x66B1: + case 0x67B1: smu_data->power_tune_defaults = &defaults_hawaii_pro; break; case 0x67B8: -- cgit v1.2.3 From 9ec051bf4470ee20505c3c1bca9dc441944de4df Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 29 Apr 2020 10:53:02 -0400 Subject: drm/amd/display: Rework dsc to isolate FPU operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we want to use float point operation on Linux we need to use within special kernel protection (`kernel_fpu_{begin,end}()`.), otherwise the kernel can clobber userspace FPU register state. For detecting these issues we use a tool named objtool (with -Ffa flags) to highlight the FPU problems, all warnings can be summed up as follows: ./tools/objtool/objtool check -Ffa drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.o [..] dc/dsc/rc_calc.o: warning: objtool: get_qp_set()+0x2f8: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: dsc_roundf()+0x5: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: dsc_ceil()+0x5: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: get_ofs_set()+0x3eb: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: calc_rc_params()+0x3c: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/dc_dsc.o: warning: objtool: get_dsc_bandwidth_range.isra.0()+0x8d: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/dc_dsc.o: warning: objtool: setup_dsc_config()+0x2ef: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc_dpi.o: warning: objtool:copy_pps_fields()+0xbb: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc_dpi.o: warning: objtool: dscc_compute_dsc_parameters()+0x7b: FPU instruction outside of kernel_fpu_{begin,end}() This commit fixes the above issues by rework DSC as described: 1. Isolate all FPU operations in a single file; 2. Use FPU flags only in the file that handles FPU operations; 3. Isolate all functions that require float point operation in static functions; 4. Add a mid-layer function that does not use any float point operation, and that could be safely invoked in other parts of the code. 5. Keep float point operation under DC_FP_{START/END} macro. CC: Christian König CC: Alexander Deucher CC: Peter Zijlstra CC: Tony Cheng CC: Harry Wentland Signed-off-by: Rodrigo Siqueira Reviewed-by: Mikita Lipski Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 2 - drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 18 +-- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c | 151 ++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h | 5 +- drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c | 27 +--- 5 files changed, 153 insertions(+), 50 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index 3f66868df171..ea29cf95d470 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -28,8 +28,6 @@ endif endif CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags) DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 0ea6662a1563..0c7f247bb7de 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -22,10 +22,12 @@ * Author: AMD */ +#include #include "dc_hw_types.h" #include "dsc.h" #include #include "dc.h" +#include "rc_calc.h" /* This module's internal functions */ @@ -304,22 +306,6 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value) return (value + 9) / 10; } -static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div) -{ - uint32_t dsc_target_bpp_x16; - float f_dsc_target_bpp; - float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; - uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision - - f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; - - // Round down to the nearest precision stop to bring it into DSC spec range - dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision); - dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; - - return dsc_target_bpp_x16; -} - /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock * and uncompressed bandwidth. */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index 03ae15946c6d..667afbc260f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -23,6 +23,7 @@ * Authors: AMD * */ +#include #include "os_types.h" #include "rc_calc.h" @@ -40,7 +41,8 @@ break -void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp) +static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, + enum max_min max_min, float bpp) { int mode = MODE_SELECT(444, 422, 420); int sel = table_hash(mode, bpc, max_min); @@ -85,7 +87,7 @@ void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum ma memcpy(qps, table[index].qps, sizeof(qp_set)); } -double dsc_roundf(double num) +static double dsc_roundf(double num) { if (num < 0.0) num = num - 0.5; @@ -95,7 +97,7 @@ double dsc_roundf(double num) return (int)(num); } -double dsc_ceil(double num) +static double dsc_ceil(double num) { double retval = (int)num; @@ -105,7 +107,7 @@ double dsc_ceil(double num) return (int)retval; } -void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) +static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) { int *p = ofs; @@ -160,7 +162,7 @@ void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) } } -int median3(int a, int b, int c) +static int median3(int a, int b, int c) { if (a > b) swap(a, b); @@ -172,13 +174,25 @@ int median3(int a, int b, int c) return b; } -void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version) +static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm, + enum bits_per_comp bpc, u8 drm_bpp, + bool is_navite_422_or_420, + int slice_width, int slice_height, + int minor_version) { + float bpp; float bpp_group; float initial_xmit_delay_factor; int padding_pixels; int i; + bpp = ((float)drm_bpp / 16.0); + /* in native_422 or native_420 modes, the bits_per_pixel is double the + * target bpp (the latter is what calc_rc_params expects) + */ + if (is_navite_422_or_420) + bpp /= 2.0; + rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); @@ -251,3 +265,128 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com rc->rc_buf_thresh[13] = 8064; } +static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp, + bool is_navite_422_or_420) +{ + float bpp; + u32 bytes_per_pixel; + double d_bytes_per_pixel; + + bpp = ((float)drm_bpp / 16.0); + d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; + // TODO: Make sure the formula for calculating this is precise (ceiling + // vs. floor, and at what point they should be applied) + if (is_navite_422_or_420) + d_bytes_per_pixel /= 2; + + bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); + + return bytes_per_pixel; +} + +static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, + u32 bpp_increment_div) +{ + u32 dsc_target_bpp_x16; + float f_dsc_target_bpp; + float f_stream_bandwidth_100bps; + // bpp_increment_div is actually precision + u32 precision = bpp_increment_div; + + f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; + f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; + + // Round down to the nearest precision stop to bring it into DSC spec + // range + dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision); + dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; + + return dsc_target_bpp_x16; +} + +/** + * calc_rc_params - reads the user's cmdline mode + * @rc: DC internal DSC parameters + * @pps: DRM struct with all required DSC values + * + * This function expects a drm_dsc_config data struct with all the required DSC + * values previously filled out by our driver and based on this information it + * computes some of the DSC values. + * + * @note This calculation requires float point operation, most of it executes + * under kernel_fpu_{begin,end}. + */ +void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) +{ + enum colour_mode mode; + enum bits_per_comp bpc; + bool is_navite_422_or_420; + u8 drm_bpp = pps->bits_per_pixel; + int slice_width = pps->slice_width; + int slice_height = pps->slice_height; + + mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 : + (pps->native_422 ? CM_422 : + pps->native_420 ? CM_420 : CM_444)); + bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10) + ? BPC_10 : BPC_12; + + is_navite_422_or_420 = pps->native_422 || pps->native_420; + + DC_FP_START(); + _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420, + slice_width, slice_height, + pps->dsc_version_minor); + DC_FP_END(); +} + +/** + * calc_dsc_bytes_per_pixel - calculate bytes per pixel + * @pps: DRM struct with all required DSC values + * + * Based on the information inside drm_dsc_config, this function calculates the + * total of bytes per pixel. + * + * @note This calculation requires float point operation, most of it executes + * under kernel_fpu_{begin,end}. + * + * Return: + * Return the number of bytes per pixel + */ +u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) + +{ + u32 ret; + u8 drm_bpp = pps->bits_per_pixel; + int slice_width = pps->slice_width; + bool is_navite_422_or_420 = pps->native_422 || pps->native_420; + + DC_FP_START(); + ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp, + is_navite_422_or_420); + DC_FP_END(); + return ret; +} + +/** + * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel + * @stream_bandwidth_kbps: + * @pix_clk_100hz: + * @bpp_increment_div: + * + * Calculate the total of bits per pixel for DSC configuration. + * + * @note This calculation requires float point operation, most of it executes + * under kernel_fpu_{begin,end}. + */ +u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, + u32 bpp_increment_div) +{ + u32 dsc_bpp; + + DC_FP_START(); + dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz, + bpp_increment_div); + DC_FP_END(); + return dsc_bpp; +} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h index b6b1f09c2009..21723fa6561e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h @@ -77,7 +77,10 @@ struct qp_entry { typedef struct qp_entry qp_table[]; -void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version); +void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); +u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); +u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, + u32 bpp_increment_div); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c index 1f6e63b71456..ef830aded5b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c @@ -27,8 +27,6 @@ #include "dscc_types.h" #include "rc_calc.h" -double dsc_ceil(double num); - static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from) { to->line_buf_depth = from->line_buf_depth; @@ -100,34 +98,13 @@ static void copy_rc_to_cfg(struct drm_dsc_config *dsc_cfg, const struct rc_param int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params) { - enum colour_mode mode = pps->convert_rgb ? CM_RGB : - (pps->simple_422 ? CM_444 : - (pps->native_422 ? CM_422 : - pps->native_420 ? CM_420 : CM_444)); - enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 : - (pps->bits_per_component == 10) ? BPC_10 : BPC_12; - float bpp = ((float) pps->bits_per_pixel / 16.0); - int slice_width = pps->slice_width; - int slice_height = pps->slice_height; int ret; struct rc_params rc; struct drm_dsc_config dsc_cfg; - double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; - - // TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied) - if (pps->native_422 || pps->native_420) - d_bytes_per_pixel /= 2; - - dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000); - - /* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp - * (the latter is what calc_rc_params expects) - */ - if (pps->native_422 || pps->native_420) - bpp /= 2.0; + dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps); - calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor); + calc_rc_params(&rc, pps); dsc_params->pps = *pps; dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset); -- cgit v1.2.3 From 5fddd4f5db873035cbf916495e8a8769f77673bc Mon Sep 17 00:00:00 2001 From: Kalyan Thota Date: Thu, 11 Jun 2020 18:25:15 +0530 Subject: drm/msm/dpu: request for display color blocks based on hw catalog entry Request for color processing blocks only if they are available in the display hw catalog and they are sufficient in number for the selection. Signed-off-by: Kalyan Thota Fixes: e47616df008b ("drm/msm/dpu: add support for color processing Tested-by: John Stultz Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 63976dcd2ac8..9f8de773ed59 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -521,7 +521,7 @@ static struct msm_display_topology dpu_encoder_get_topology( struct dpu_kms *dpu_kms, struct drm_display_mode *mode) { - struct msm_display_topology topology; + struct msm_display_topology topology = {0}; int i, intf_count = 0; for (i = 0; i < MAX_PHYS_ENCODERS_PER_VIRTUAL; i++) @@ -537,7 +537,8 @@ static struct msm_display_topology dpu_encoder_get_topology( * 1 LM, 1 INTF * 2 LM, 1 INTF (stream merge to support high resolution interfaces) * - * Adding color blocks only to primary interface + * Adding color blocks only to primary interface if available in + * sufficient number */ if (intf_count == 2) topology.num_lm = 2; @@ -546,8 +547,11 @@ static struct msm_display_topology dpu_encoder_get_topology( else topology.num_lm = (mode->hdisplay > MAX_HDISPLAY_SPLIT) ? 2 : 1; - if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_DSI) - topology.num_dspp = topology.num_lm; + if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_DSI) { + if (dpu_kms->catalog->dspp && + (dpu_kms->catalog->dspp_count >= topology.num_lm)) + topology.num_dspp = topology.num_lm; + } topology.num_enc = 0; topology.num_intf = intf_count; -- cgit v1.2.3 From 177d3819633cd520e3f95df541a04644aab4c657 Mon Sep 17 00:00:00 2001 From: Bernard Zhao Date: Fri, 12 Jun 2020 09:23:49 +0800 Subject: drm/msm: fix potential memleak in error branch In function msm_submitqueue_create, the queue is a local variable, in return -EINVAL branch, queue didn`t add to ctx`s list yet, and also didn`t kfree, this maybe bring in potential memleak. Signed-off-by: Bernard Zhao [trivial commit msg fixup] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_submitqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 001fbf537440..a1d94be7883a 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -71,8 +71,10 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, queue->flags = flags; if (priv->gpu) { - if (prio >= priv->gpu->nr_rings) + if (prio >= priv->gpu->nr_rings) { + kfree(queue); return -EINVAL; + } queue->prio = prio; } -- cgit v1.2.3 From aa472721c8dbe1713cf510f56ffbc56ae9e14247 Mon Sep 17 00:00:00 2001 From: Chen Tao Date: Mon, 8 Jun 2020 09:48:59 +0800 Subject: drm/msm/dpu: fix error return code in dpu_encoder_init Fix to return negative error code -ENOMEM with the use of ERR_PTR from dpu_encoder_init. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Chen Tao Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 9f8de773ed59..30254741e6ef 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2187,7 +2187,7 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, dpu_enc = devm_kzalloc(dev->dev, sizeof(*dpu_enc), GFP_KERNEL); if (!dpu_enc) - return ERR_PTR(ENOMEM); + return ERR_PTR(-ENOMEM); rc = drm_encoder_init(dev, &dpu_enc->base, &dpu_encoder_funcs, drm_enc_mode, NULL); -- cgit v1.2.3 From 2e7ec6b5297157efabb50e5f82adc628cf90296c Mon Sep 17 00:00:00 2001 From: Krishna Manikandan Date: Thu, 28 May 2020 14:04:28 +0530 Subject: drm/msm/dpu: allow initialization of encoder locks during encoder init In the current implementation, mutex initialization for encoder mutex locks are done during encoder setup. This can lead to scenarios where the lock is used before it is initialized. Move mutex_init to dpu_encoder_init to avoid this. Signed-off-by: Krishna Manikandan Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 30254741e6ef..0946a86b37b2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2140,7 +2140,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc, dpu_enc = to_dpu_encoder_virt(enc); - mutex_init(&dpu_enc->enc_lock); ret = dpu_encoder_setup_display(dpu_enc, dpu_kms, disp_info); if (ret) goto fail; @@ -2155,7 +2154,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc, 0); - mutex_init(&dpu_enc->rc_lock); INIT_DELAYED_WORK(&dpu_enc->delayed_off_work, dpu_encoder_off_work); dpu_enc->idle_timeout = IDLE_TIMEOUT; @@ -2200,6 +2198,8 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, spin_lock_init(&dpu_enc->enc_spinlock); dpu_enc->enabled = false; + mutex_init(&dpu_enc->enc_lock); + mutex_init(&dpu_enc->rc_lock); return &dpu_enc->base; } -- cgit v1.2.3 From bc8bd54ffd8513e05eb87b5badcb2d586ea278f0 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Sat, 13 Jun 2020 04:21:59 +0000 Subject: drm/msm: Fix 0xfffflub in "Refactor address space initialization" This week I started seeing GPU crashes on my DragonBoard 845c which I narrowed down to being caused by commit ccac7ce373c1 ("drm/msm: Refactor address space initialization"). Looking through the patch, Jordan and I couldn't find anything obviously wrong, so I ended up breaking that change up into a number of smaller logical steps so I could figure out which part was causing the trouble. Ends up, visually counting 'f's is hard, esp across a number of lines: 0xfffffff != 0xffffffff This patch corrects the end value we pass in to msm_gem_address_space_create() in adreno_iommu_create_address_space() so that it matches the value used before the problematic patch landed. With this change, I no longer see the GPU crashes that were affecting me. Cc: Shawn Guo Cc: Rob Clark Cc: Sean Paul Cc: Jordan Crouse Cc: freedreno@lists.freedesktop.org Fixes: ccac7ce373c1 ("drm/msm: Refactor address space initialization") Signed-off-by: John Stultz Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 89673c7ed473..3d4efe684a98 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -194,7 +194,7 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, struct msm_gem_address_space *aspace; aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, - 0xfffffff); + 0xffffffff); if (IS_ERR(aspace) && !IS_ERR(mmu)) mmu->funcs->destroy(mmu); -- cgit v1.2.3 From 37cc4b95d13f311c04aa8e9daacca3905ad45ca7 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Sat, 13 Jun 2020 20:28:38 +0800 Subject: drm/ttm: Fix dma_fence refcnt leak in ttm_bo_vm_fault_reserved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ttm_bo_vm_fault_reserved() invokes dma_fence_get(), which returns a reference of the specified dma_fence object to "moving" with increased refcnt. When ttm_bo_vm_fault_reserved() returns, local variable "moving" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in several exception handling paths of ttm_bo_vm_fault_reserved(). When those error scenarios occur such as "err" equals to -EBUSY, the function forgets to decrease the refcnt increased by dma_fence_get(), causing a refcnt leak. Fix this issue by calling dma_fence_put() when no_wait_gpu flag is equals to true. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/370219/ Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 0ad30b112982..72100b84c7a9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -300,8 +300,10 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, break; case -EBUSY: case -ERESTARTSYS: + dma_fence_put(moving); return VM_FAULT_NOPAGE; default: + dma_fence_put(moving); return VM_FAULT_SIGBUS; } -- cgit v1.2.3 From 11425c4519e2c974a100fc984867046d905b9380 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Sat, 13 Jun 2020 20:30:25 +0800 Subject: drm/ttm: Fix dma_fence refcnt leak when adding move fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ttm_bo_add_move_fence() invokes dma_fence_get(), which returns a reference of the specified dma_fence object to "fence" with increased refcnt. When ttm_bo_add_move_fence() returns, local variable "fence" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one exception handling path of ttm_bo_add_move_fence(). When no_wait_gpu flag is equals to true, the function forgets to decrease the refcnt increased by dma_fence_get(), causing a refcnt leak. Fix this issue by calling dma_fence_put() when no_wait_gpu flag is equals to true. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/370221/ Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9e07c3f75156..ef5bc00c73e2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -881,8 +881,10 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, if (!fence) return 0; - if (no_wait_gpu) + if (no_wait_gpu) { + dma_fence_put(fence); return -EBUSY; + } dma_resv_add_shared_fence(bo->base.resv, fence); -- cgit v1.2.3 From 8497376707be6d2ab30f6d6662f42ab0c15d2ba1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:20:46 +0100 Subject: drm/i915/gt: Incorporate the virtual engine into timeslicing It was quite the oversight to only factor in the normal queue to decide the timeslicing switch priority. By leaving out the next virtual request from the priority decision, we would not timeslice the current engine if there was an available virtual request. Testcase: igt/gem_exec_balancer/sliced Fixes: 3df2deed411e ("drm/i915/execlists: Enable timeslice on partial virtual engine dequeue") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200519132046.22443-3-chris@chris-wilson.co.uk (cherry picked from commit 6ad249ba59badc7ff157d4db1f835748f0e2c9b6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 87e6c5bdd2dc..0b58e6c1e235 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1895,7 +1895,8 @@ static void defer_active(struct intel_engine_cs *engine) static bool need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + const struct rb_node *rb) { int hint; @@ -1903,6 +1904,24 @@ need_timeslice(const struct intel_engine_cs *engine, return false; hint = engine->execlists.queue_priority_hint; + + if (rb) { + const struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + const struct intel_engine_cs *inflight = + intel_context_inflight(&ve->context); + + if (!inflight || inflight == engine) { + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + hint = max(hint, rq_prio(next)); + rcu_read_unlock(); + } + } + if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); @@ -1977,10 +1996,9 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, duration); } -static void start_timeslice(struct intel_engine_cs *engine) +static void start_timeslice(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists *execlists = &engine->execlists; - const int prio = queue_prio(execlists); unsigned long duration; if (!intel_engine_has_timeslices(engine)) @@ -2140,7 +2158,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last) && + } else if (need_timeslice(engine, last, rb) && timeslice_expired(execlists, last)) { if (i915_request_completed(last)) { tasklet_hi_schedule(&execlists->tasklet); @@ -2188,7 +2206,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - start_timeslice(engine); + start_timeslice(engine, queue_prio(execlists)); return; } } @@ -2223,7 +2241,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - start_timeslice(engine); + start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } -- cgit v1.2.3 From 54a9adc4608144c7a17530822246f3cfa9b15d76 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:08 +0200 Subject: drm/i915/pmu: avoid an maybe-uninitialized warning Conditional spinlocks make it hard for gcc and for lockdep to follow the code flow. This one causes a warning with at least gcc-9 and higher: In file included from include/linux/irq.h:14, from drivers/gpu/drm/i915/i915_pmu.c:7: drivers/gpu/drm/i915/i915_pmu.c: In function 'i915_sample': include/linux/spinlock.h:289:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized] 289 | _raw_spin_unlock_irqrestore(lock, flags); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/i915_pmu.c:288:17: note: 'flags' was declared here 288 | unsigned long flags; | ^~~~~ Split out the part between the locks into a separate function for readability and to let the compiler figure out what the logic actually is. Fixes: d79e1bd676f0 ("drm/i915/pmu: Only use exclusive mmio access for gen7") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-1-arnd@arndb.de (cherry picked from commit 6ec81b82732e2b4a5ac0853fd33919ff1ca94238) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 84 ++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e991a707bdb7..962ded9ce73f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) return IS_GEN(i915, 7); } +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +{ + struct intel_engine_pmu *pmu = &engine->pmu; + bool busy; + u32 val; + + val = ENGINE_READ_FW(engine, RING_CTL); + if (val == 0) /* powerwell off => engine idle */ + return; + + if (val & RING_WAIT) + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); + if (val & RING_WAIT_SEMAPHORE) + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + return; + + /* + * While waiting on a semaphore or event, MI_MODE reports the + * ring as idle. However, previously using the seqno, and with + * execlists sampling, we account for the ring waiting as the + * engine being busy. Therefore, we record the sample as being + * busy if either waiting or !idle. + */ + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); + if (!busy) { + val = ENGINE_READ_FW(engine, RING_MI_MODE); + busy = !(val & MODE_IDLE); + } + if (busy) + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); +} + static void engines_sample(struct intel_gt *gt, unsigned int period_ns) { struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned long flags; if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; @@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) return; for_each_engine(engine, gt, id) { - struct intel_engine_pmu *pmu = &engine->pmu; - spinlock_t *mmio_lock; - unsigned long flags; - bool busy; - u32 val; - if (!intel_engine_pm_get_if_awake(engine)) continue; - mmio_lock = NULL; - if (exclusive_mmio_access(i915)) - mmio_lock = &engine->uncore->lock; - - if (unlikely(mmio_lock)) - spin_lock_irqsave(mmio_lock, flags); - - val = ENGINE_READ_FW(engine, RING_CTL); - if (val == 0) /* powerwell off => engine idle */ - goto skip; - - if (val & RING_WAIT) - add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); - if (val & RING_WAIT_SEMAPHORE) - add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); - - /* No need to sample when busy stats are supported. */ - if (intel_engine_supports_stats(engine)) - goto skip; - - /* - * While waiting on a semaphore or event, MI_MODE reports the - * ring as idle. However, previously using the seqno, and with - * execlists sampling, we account for the ring waiting as the - * engine being busy. Therefore, we record the sample as being - * busy if either waiting or !idle. - */ - busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); - if (!busy) { - val = ENGINE_READ_FW(engine, RING_MI_MODE); - busy = !(val & MODE_IDLE); + if (exclusive_mmio_access(i915)) { + spin_lock_irqsave(&engine->uncore->lock, flags); + engine_sample(engine, period_ns); + spin_unlock_irqrestore(&engine->uncore->lock, flags); + } else { + engine_sample(engine, period_ns); } - if (busy) - add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); -skip: - if (unlikely(mmio_lock)) - spin_unlock_irqrestore(mmio_lock, flags); intel_engine_pm_put_async(engine); } } -- cgit v1.2.3 From 70cac501b576710b426f3e9f67fcd0a0558df72c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:09 +0200 Subject: drm/i915: work around false-positive maybe-uninitialized warning gcc-9 gets confused by the code flow in check_dirty_whitelist: drivers/gpu/drm/i915/gt/selftest_workarounds.c: In function 'check_dirty_whitelist': drivers/gpu/drm/i915/gt/selftest_workarounds.c:492:17: error: 'rsvd' may be used uninitialized in this function [-Werror=maybe-uninitialized] I could not figure out a good way to do this in a way that gcc understands better, so initialize the variable to zero, as last resort. Fixes: aee20aaed887 ("drm/i915: Implement read-only support in whitelist selftest") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-2-arnd@arndb.de (cherry picked from commit cc649a9eafc1ef5c40db023084cb94422d08aa84) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 5ed323254ee1..32785463ec9e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -623,6 +623,8 @@ err_request: err = -EINVAL; goto out_unpin; } + } else { + rsvd = 0; } expect = results[0]; -- cgit v1.2.3 From 3ffbe35321f4671c924a47bb50ca8087f87b2791 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:14 +0100 Subject: drm/i915/selftests: Restore to default heartbeat Since we temporarily disable the heartbeat and restore back to the default value, we can use the stored defaults on the engine and avoid using a local. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-3-chris@chris-wilson.co.uk (cherry picked from commit 3a230a554dbbc6cd5016cf1b56ee77cfcd48c7d8) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 25 ++++------ drivers/gpu/drm/i915/gt/selftest_lrc.c | 67 ++++++++++----------------- drivers/gpu/drm/i915/gt/selftest_rps.c | 69 +++++++++++----------------- drivers/gpu/drm/i915/gt/selftest_timeline.c | 15 +++--- 4 files changed, 67 insertions(+), 109 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2b2efff6e19d..4aa4cc917d8b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -310,22 +310,20 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static int igt_hang_sanitycheck(void *arg) @@ -473,7 +471,6 @@ static int igt_reset_nop_engine(void *arg) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count, count; struct intel_context *ce; - unsigned long heartbeat; IGT_TIMEOUT(end_time); int err; @@ -485,7 +482,7 @@ static int igt_reset_nop_engine(void *arg) reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -529,7 +526,7 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -564,7 +561,6 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; - unsigned long heartbeat; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -580,7 +576,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -632,7 +628,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) break; @@ -789,7 +785,6 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; - unsigned long heartbeat; IGT_TIMEOUT(end_time); if (flags & TEST_ACTIVE && @@ -832,7 +827,7 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -906,7 +901,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 824f99c4cc7c..ba97d1099e27 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -51,22 +51,20 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static bool is_active(struct i915_request *rq) @@ -224,7 +222,6 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; - unsigned long saved; int n; if (prio && !intel_engine_has_preemption(engine)) @@ -237,7 +234,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; @@ -345,7 +342,7 @@ err_ce: intel_context_put(ce[n]); } - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (igt_live_test_end(&t)) err = -EIO; if (err) @@ -466,7 +463,6 @@ static int live_hold_reset(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - unsigned long heartbeat; struct i915_request *rq; ce = intel_context_create(engine); @@ -475,7 +471,7 @@ static int live_hold_reset(void *arg) break; } - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -535,7 +531,7 @@ static int live_hold_reset(void *arg) i915_request_put(rq); out: - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); intel_context_put(ce); if (err) break; @@ -580,10 +576,9 @@ static int live_error_interrupt(void *arg) for_each_engine(engine, gt, id) { const struct error_phase *p; - unsigned long heartbeat; int err = 0; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); for (p = phases; p->error[0] != GOOD; p++) { struct i915_request *client[ARRAY_SIZE(phases->error)]; @@ -682,7 +677,7 @@ out: } } - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) { intel_gt_set_wedged(gt); return err; @@ -895,16 +890,14 @@ static int live_timeslice_preempt(void *arg) enum intel_engine_id id; for_each_engine(engine, gt, id) { - unsigned long saved; - if (!intel_engine_has_preemption(engine)) continue; memset(vaddr, 0, PAGE_SIZE); - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); err = slice_semaphore_queue(engine, vma, count); - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (err) goto err_pin; @@ -1009,7 +1002,6 @@ static int live_timeslice_rewind(void *arg) enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; - unsigned long heartbeat; unsigned long timeslice; int i, err = 0; u32 *slot; @@ -1028,7 +1020,7 @@ static int live_timeslice_rewind(void *arg) * Expect execution/evaluation order XZY */ - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); slot = memset32(engine->status_page.addr + 1000, 0, 4); @@ -1122,7 +1114,7 @@ err: wmb(); engine->props.timeslice_duration_ms = timeslice; - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); for (i = 0; i < 3; i++) i915_request_put(rq[i]); if (igt_flush_test(gt->i915)) @@ -1202,12 +1194,11 @@ static int live_timeslice_queue(void *arg) .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; struct i915_request *rq, *nop; - unsigned long saved; if (!intel_engine_has_preemption(engine)) continue; - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ @@ -1284,7 +1275,7 @@ static int live_timeslice_queue(void *arg) err_rq: i915_request_put(rq); err_heartbeat: - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (err) break; } @@ -4153,7 +4144,6 @@ static int reset_virtual_engine(struct intel_gt *gt, { struct intel_engine_cs *engine; struct intel_context *ve; - unsigned long *heartbeat; struct igt_spinner spin; struct i915_request *rq; unsigned int n; @@ -4165,15 +4155,9 @@ static int reset_virtual_engine(struct intel_gt *gt, * descendents are not executed while the capture is in progress. */ - heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); - if (!heartbeat) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; - if (igt_spinner_init(&spin, gt)) { - err = -ENOMEM; - goto out_free; - } - ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); @@ -4181,7 +4165,7 @@ static int reset_virtual_engine(struct intel_gt *gt, } for (n = 0; n < nsibling; n++) - engine_heartbeat_disable(siblings[n], &heartbeat[n]); + engine_heartbeat_disable(siblings[n]); rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -4252,13 +4236,11 @@ out_rq: i915_request_put(rq); out_heartbeat: for (n = 0; n < nsibling; n++) - engine_heartbeat_enable(siblings[n], heartbeat[n]); + engine_heartbeat_enable(siblings[n]); intel_context_put(ve); out_spin: igt_spinner_fini(&spin); -out_free: - kfree(heartbeat); return err; } @@ -4932,9 +4914,7 @@ static int live_lrc_gpr(void *arg) return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); err = __live_lrc_gpr(engine, scratch, false); if (err) @@ -4945,7 +4925,7 @@ static int live_lrc_gpr(void *arg) goto err; err: - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -5092,10 +5072,9 @@ static int live_lrc_timestamp(void *arg) */ for_each_engine(data.engine, gt, id) { - unsigned long heartbeat; int i, err = 0; - engine_heartbeat_disable(data.engine, &heartbeat); + engine_heartbeat_disable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { struct intel_context *tmp; @@ -5128,7 +5107,7 @@ static int live_lrc_timestamp(void *arg) } err: - engine_heartbeat_enable(data.engine, heartbeat); + engine_heartbeat_enable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { if (!data.ce[i]) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 6275d69aa9cc..5049c3dd08a6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -20,24 +20,20 @@ /* Try to isolate the impact of cstates from determing frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ -static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - unsigned long old; - - old = fetch_and_zero(&engine->props.heartbeat_interval_ms); + engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); - - return old; } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static void dummy_rps_work(struct work_struct *wrk) @@ -246,7 +242,6 @@ int live_rps_clock_interval(void *arg) intel_gt_check_clock_frequency(gt); for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; u32 cycles; u64 dt; @@ -254,13 +249,13 @@ int live_rps_clock_interval(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -271,7 +266,7 @@ int live_rps_clock_interval(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -327,7 +322,7 @@ int live_rps_clock_interval(void *arg) intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); @@ -405,7 +400,6 @@ int live_rps_control(void *arg) intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; ktime_t min_dt, max_dt; int f, limit; @@ -414,7 +408,7 @@ int live_rps_control(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, @@ -430,7 +424,7 @@ int live_rps_control(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -440,7 +434,7 @@ int live_rps_control(void *arg) pr_err("%s: could not set minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -457,7 +451,7 @@ int live_rps_control(void *arg) pr_err("%s: could not restore minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -472,7 +466,7 @@ int live_rps_control(void *arg) min_dt = ktime_sub(ktime_get(), min_dt); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", engine->name, @@ -635,7 +629,6 @@ int live_rps_frequency_cs(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct i915_vma *vma; u32 *cancel, *cntr; @@ -644,14 +637,14 @@ int live_rps_frequency_cs(void *arg) int freq; } min, max; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, false, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); break; } @@ -732,7 +725,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -778,7 +771,6 @@ int live_rps_frequency_srm(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct i915_vma *vma; u32 *cancel, *cntr; @@ -787,14 +779,14 @@ int live_rps_frequency_srm(void *arg) int freq; } min, max; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, true, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); break; } @@ -874,7 +866,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -1066,16 +1058,14 @@ int live_rps_interrupt(void *arg) for_each_engine(engine, gt, id) { /* Keep the engine busy with a spinner; expect an UP! */ if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { - unsigned long saved_heartbeat; - intel_gt_pm_wait_for_idle(engine->gt); GEM_BUG_ON(intel_rps_is_active(rps)); - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); err = __rps_up_interrupt(rps, engine, &spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err) goto out; @@ -1084,15 +1074,13 @@ int live_rps_interrupt(void *arg) /* Keep the engine awake but idle and check for DOWN */ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { - unsigned long saved_heartbeat; - - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); intel_rc6_disable(>->rc6); err = __rps_down_interrupt(rps, engine); intel_rc6_enable(>->rc6); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err) goto out; } @@ -1168,7 +1156,6 @@ int live_rps_power(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct { u64 power; @@ -1178,13 +1165,13 @@ int live_rps_power(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -1195,7 +1182,7 @@ int live_rps_power(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -1208,7 +1195,7 @@ int live_rps_power(void *arg) min.power = measure_power_at(rps, &min.freq); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", engine->name, diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index c2578a0f2f14..ef1c35073dc0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -751,22 +751,20 @@ out_free: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static int live_hwsp_rollover_kernel(void *arg) @@ -785,10 +783,9 @@ static int live_hwsp_rollover_kernel(void *arg) struct intel_context *ce = engine->kernel_context; struct intel_timeline *tl = ce->timeline; struct i915_request *rq[3] = {}; - unsigned long heartbeat; int i; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); if (intel_gt_wait_for_idle(gt, HZ / 2)) { err = -EIO; goto out; @@ -839,7 +836,7 @@ static int live_hwsp_rollover_kernel(void *arg) out: for (i = 0; i < ARRAY_SIZE(rq); i++) i915_request_put(rq[i]); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) break; } -- cgit v1.2.3 From 4178b5a60cefd4721095cce974af7c142f2a6331 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 May 2020 17:24:18 +0100 Subject: drm/i915/gt: Prevent timeslicing into unpreemptable requests We have a I915_REQUEST_NOPREEMPT flag that we set when we must prevent the HW from preempting during the course of this request. We need to honour this flag and protect the HW even if we have a heartbeat request, or other maximum priority barrier, pending. As such, restrict the timeslicing check to avoid preempting into the topmost priority band, leaving the unpreemptable requests in blissful peace running uninterrupted on the HW. v2: Set the I915_PRIORITY_BARRIER to be less than I915_PRIORITY_UNPREEMPTABLE so that we never submit a request (heartbeat or barrier) that can legitimately preempt the current non-premptable request. Fixes: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200527162418.24755-1-chris@chris-wilson.co.uk (cherry picked from commit b72f02d78e4f257761ed003444ae52083f962076) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/selftest_lrc.c | 118 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_priolist_types.h | 2 +- 3 files changed, 119 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0b58e6c1e235..1858331e423e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1925,6 +1925,7 @@ need_timeslice(const struct intel_engine_cs *engine, if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); + GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); return hint >= effective_prio(rq); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ba97d1099e27..924bc01ef526 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -823,7 +823,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, } } - err = release_queue(outer, vma, n, INT_MAX); + err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); if (err) goto out; @@ -1289,6 +1289,121 @@ err_obj: return err; } +static int live_timeslice_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We should not timeslice into a request that is marked with + * I915_REQUEST_NOPREEMPT. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + unsigned long timeslice; + + if (!intel_engine_has_preemption(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + /* Create an unpreemptible spinner */ + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); + i915_request_put(rq); + + /* Followed by a maximum priority barrier (heartbeat) */ + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + /* + * Wait until the barrier is in ELSP, and we know timeslicing + * will have been activated. + */ + if (wait_for_submit(engine, rq, HZ / 2)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + /* + * Since the ELSP[0] request is unpreemptible, it should not + * allow the maximum priority barrier through. Wait long + * enough to see if it is timesliced in by mistake. + */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { + pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", + engine->name); + err = -EINVAL; + } + i915_request_put(rq); + +out_spin: + igt_spinner_end(&spin); +out_heartbeat: + xchg(&engine->props.timeslice_duration_ms, timeslice); + engine_heartbeat_enable(engine); + if (err) + break; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + return err; +} + static int live_busywait_preempt(void *arg) { struct intel_gt *gt = arg; @@ -4296,6 +4411,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_rewind), SUBTEST(live_timeslice_queue), + SUBTEST(live_timeslice_nopreempt), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 5003a71113cb..8aa7866ec6b6 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -42,7 +42,7 @@ enum { * active request. */ #define I915_PRIORITY_UNPREEMPTABLE INT_MAX -#define I915_PRIORITY_BARRIER INT_MAX +#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) struct i915_priolist { struct list_head requests[I915_PRIORITY_COUNT]; -- cgit v1.2.3 From a43555ac908c604f45ed98628805aec9355b9737 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Mon, 8 Jun 2020 13:45:37 -0700 Subject: drm/i915/tc: fix the reset of ln0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting ln0 similar to ln1 Fixes: 3b51be4e4061b ("drm/i915/tc: Update DP_MODE programming") Cc: # v5.5+ Signed-off-by: Khaled Almahallawy Reviewed-by: José Roberto de Souza Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200608204537.28468-1-khaled.almahallawy@intel.com (cherry picked from commit 4f72a8ee819d57d7329d88f487a2fc9b45153177) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index aa22465bb56e..f53de9664903 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2987,7 +2987,7 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); } - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); /* DPPATC */ -- cgit v1.2.3 From 8ab3a3812aa90e488813e719308ffd807b865624 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Jun 2020 16:17:23 +0100 Subject: drm/i915/gt: Incrementally check for rewinding In commit 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL"), we placed the check for rewinding a context on actually submitting the next request in that context. This was so that we only had to check once, and could do so with precision avoiding as many forced restores as possible. For example, to ensure that we can resubmit the same request a couple of times, we include a small wa_tail such that on the next submission, the ring->tail will appear to move forwards when resubmitting the same request. This is very common as it will happen for every lite-restore to fill the second port after a context switch. However, intel_ring_direction() is limited in precision to movements of upto half the ring size. The consequence being that if we tried to unwind many requests, we could exceed half the ring and flip the sense of the direction, so missing a force restore. As no request can be greater than half the ring (i.e. 2048 bytes in the smallest case), we can check for rollback incrementally. As we check against the tail that would be submitted, we do not lose any sensitivity and allow lite restores for the simple case. We still need to double check upon submitting the context, to allow for multiple preemptions and resubmissions. Fixes: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Bruce Chang Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200609151723.12971-1-chris@chris-wilson.co.uk (cherry picked from commit e36ba817fa966f81fb1c8d16f3721b5a644b2fa9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++- drivers/gpu/drm/i915/gt/intel_ring.c | 4 + drivers/gpu/drm/i915/gt/selftest_mocs.c | 18 +++- drivers/gpu/drm/i915/gt/selftest_ring.c | 110 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 6 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/selftest_ring.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index da5b61085257..8691eb61e185 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) struct measure_breadcrumb { struct i915_request rq; struct intel_ring ring; - u32 cs[1024]; + u32 cs[2048]; }; static int measure_breadcrumb_dw(struct intel_context *ce) @@ -668,6 +668,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); + frame->ring.wrap = + BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); frame->ring.effective_size = frame->ring.size; intel_ring_update_space(&frame->ring); frame->rq.ring = &frame->ring; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1858331e423e..7c3d8ef4a47c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1134,6 +1134,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + /* Check in case we rollback so far we wrap [size/2] */ + if (intel_ring_direction(rq->ring, + intel_ring_wrap(rq->ring, + rq->tail), + rq->ring->tail) > 0) + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -1498,8 +1505,9 @@ static u64 execlists_update_context(struct i915_request *rq) * HW has a tendency to ignore us rewinding the TAIL to the end of * an earlier request. */ + GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); + prev = rq->ring->tail; tail = intel_ring_set_tail(rq->ring, rq->tail); - prev = ce->lrc_reg_state[CTX_RING_TAIL]; if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; @@ -4758,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) return 0; } +static void assert_request_valid(struct i915_request *rq) +{ + struct intel_ring *ring __maybe_unused = rq->ring; + + /* Can we unwind this request without appearing to go forwards? */ + GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4770,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + /* Check that entire request is less than half the ring */ + assert_request_valid(request); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 8cda1b7e17ba..bdb324167ef3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq) GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_ring.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 8831ffee2061..63f87d8608c3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -18,6 +18,20 @@ struct live_mocs { void *vaddr; }; +static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ce; + + /* We build large requests to read the registers from the ring */ + ce->ring = __intel_context_ring_size(SZ_16K); + + return ce; +} + static int request_add_sync(struct i915_request *rq, int err) { i915_request_get(rq); @@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(engine); + ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); break; @@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(engine); + ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); break; diff --git a/drivers/gpu/drm/i915/gt/selftest_ring.c b/drivers/gpu/drm/i915/gt/selftest_ring.c new file mode 100644 index 000000000000..2a8c534dc125 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_ring.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2020 Intel Corporation + */ + +static struct intel_ring *mock_ring(unsigned long sz) +{ + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + kref_init(&ring->ref); + ring->size = sz; + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz); + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + atomic_set(&ring->pin_count, 1); + + intel_ring_update_space(ring); + + return ring; +} + +static void mock_ring_free(struct intel_ring *ring) +{ + kfree(ring); +} + +static int check_ring_direction(struct intel_ring *ring, + u32 next, u32 prev, + int expected) +{ + int result; + + result = intel_ring_direction(ring, next, prev); + if (result < 0) + result = -1; + else if (result > 0) + result = 1; + + if (result != expected) { + pr_err("intel_ring_direction(%u, %u):%d != %d\n", + next, prev, result, expected); + return -EINVAL; + } + + return 0; +} + +static int check_ring_step(struct intel_ring *ring, u32 x, u32 step) +{ + u32 prev = x, next = intel_ring_wrap(ring, x + step); + int err = 0; + + err |= check_ring_direction(ring, next, next, 0); + err |= check_ring_direction(ring, prev, prev, 0); + err |= check_ring_direction(ring, next, prev, 1); + err |= check_ring_direction(ring, prev, next, -1); + + return err; +} + +static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step) +{ + int err = 0; + + err |= check_ring_step(ring, x, step); + err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step); + err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step); + + return err; +} + +static int igt_ring_direction(void *dummy) +{ + struct intel_ring *ring; + unsigned int half = 2048; + int step, err = 0; + + ring = mock_ring(2 * half); + if (!ring) + return -ENOMEM; + + GEM_BUG_ON(ring->size != 2 * half); + + /* Precision of wrap detection is limited to ring->size / 2 */ + for (step = 1; step < half; step <<= 1) { + err |= check_ring_offset(ring, 0, step); + err |= check_ring_offset(ring, half, step); + } + err |= check_ring_step(ring, 0, half - 64); + + /* And check unwrapped handling for good measure */ + err |= check_ring_offset(ring, 0, 2 * half + 64); + err |= check_ring_offset(ring, 3 * half, 1); + + mock_ring_free(ring); + return err; +} + +int intel_ring_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ring_direction), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 6a2be7d0dd95..6090ce35226b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) +selftest(ring, intel_ring_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) -- cgit v1.2.3 From 898e4e57adaeeefcfcf23f5d83360e513b69172f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 Jun 2020 01:06:16 +0300 Subject: drm/i915/icl: Disable DIP on MST ports with the transcoder clock still on According to BSpec the Data Island Packet should be disabled after disabling the transcoder, but before the transcoder clock select is set to none. On an ICL RVP, daisy-chained MST config not following this leads to a hang with the following MCE when disabling the output: [ 870.948739] mce: [Hardware Error]: CPU 0: Machine Check Exception: 5 Bank 6: ba00000011000402 [ 871.019212] mce: [Hardware Error]: RIP !INEXACT! 10: {poll_idle+0x92/0xb0} [ 871.019212] mce: [Hardware Error]: TSC 135a261fe61 [ 871.019212] mce: [Hardware Error]: PROCESSOR 0:706e5 TIME 1591739604 SOCKET 0 APIC 0 microcode 20 [ 871.019212] mce: [Hardware Error]: Run the above through 'mcelog --ascii' [ 871.019212] mce: [Hardware Error]: Machine check: Processor context corrupt [ 871.019212] Kernel panic - not syncing: Fatal machine check [ 871.019212] Kernel Offset: disabled Bspec: 4287 Fixes: fa37a213275c ("drm/i915: Stop sending DP SDPs on ddi disable") Cc: Gwan-gyeong Mun Cc: Uma Shankar Signed-off-by: Imre Deak Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200609220616.6015-1-imre.deak@intel.com (cherry picked from commit c980216dd224c52b5c70172753c209b653d84958) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f53de9664903..f92c5ed3f5d2 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3472,7 +3472,9 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, INTEL_OUTPUT_DP_MST); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state); + if (!is_mst) + intel_dp_set_infoframes(encoder, false, + old_crtc_state, old_conn_state); /* * Power down sink before disabling the port, otherwise we end diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index d18b406f2a7d..f29e51ce489c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -397,6 +397,14 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, */ drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, false); + + /* + * BSpec 4287: disable DIP after the transcoder is disabled and before + * the transcoder clock select is set to none. + */ + if (last_mst_stream) + intel_dp_set_infoframes(&intel_dig_port->base, false, + old_crtc_state, NULL); /* * From TGL spec: "If multi-stream slave transcoder: Configure * Transcoder Clock Select to direct no clock to the transcoder" -- cgit v1.2.3 From ef50fa9bd17d13d0611e39e13b37bbd3e1ea50bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 10:30:15 +0100 Subject: drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. v2: Leave HSW_SCRATCH to set an explicit value, not or in our disable bit. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2011 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611093015.11370-1-chris@chris-wilson.co.uk (cherry picked from commit f93ec5fb563779bda4501890b1854526de58e0f1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 48 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 39 ++--------------------- 2 files changed, 50 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 90a2b9e399b0..b5d0fbe3b211 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -178,6 +178,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write_masked_or(wal, reg, set, set); } +static void +wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) +{ + wa_write_masked_or(wal, reg, clr, 0); +} + static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { @@ -686,6 +692,46 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* L3 caching of data atomics doesn't work -- disable it. */ + wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + + wa_add(wal, + HSW_ROW_CHICKEN3, 0, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), + 0 /* XXX does this reg exist? */); + + /* WaVSRefCountFullforceMissDisable:hsw */ + wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); + + wa_masked_dis(wal, + CACHE_MODE_0_GEN7, + /* WaDisable_RenderCache_OperationalFlush:hsw */ + RC_OP_FLUSH_ENABLE | + /* enable HiZ Raw Stall Optimization */ + HIZ_RAW_STALL_OPT_DISABLE); + + /* WaDisable4x2SubspanOptimization:hsw */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* WaSampleCChickenBitEnable:hsw */ + wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); +} + static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -963,6 +1009,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) bxt_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); + else if (IS_HASWELL(i915)) + hsw_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 696491d71a1d..0b7a4c5f179d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7230,45 +7230,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - /* L3 caching of data atomics doesn't work -- disable it. */ - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); - I915_WRITE(HSW_ROW_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); - /* This is required by WaCatErrorRejectionIssue:hsw */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - - /* WaVSRefCountFullforceMissDisable:hsw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); - - /* WaDisable_RenderCache_OperationalFlush:hsw */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - - /* WaDisable4x2SubspanOptimization:hsw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* WaSampleCChickenBitEnable:hsw */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); -- cgit v1.2.3 From 7237b190add0794bd95979018a23eda698f2705d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:36 +0100 Subject: drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-2-chris@chris-wilson.co.uk (cherry picked from commit 19f1f627b33385a2f0855cbc7d33d86d7f4a1e78) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 62 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 48 ---------------------- 3 files changed, 63 insertions(+), 49 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b5d0fbe3b211..d9b72679ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,6 +692,66 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:ivb */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(i915)) + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:ivb */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ + wa_masked_dis(wal, + GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); + + /* WaApplyL3ControlAndL3ChickenMode:ivb */ + wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); + wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + + /* WaForceL3Serialization:ivb */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + } + + /* WaDisable4x2SubspanOptimization:ivb */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); +} + static void hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1011,6 +1071,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) hsw_gt_workarounds_init(i915, wal); + else if (IS_IVYBRIDGE(i915)) + ivb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7717581350bd..06cd1d28a176 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7896,7 +7896,7 @@ enum { /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) - #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26)) + #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10) #define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0b7a4c5f179d..7236644af40b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7247,32 +7247,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - /* WaDisableEarlyCull:ivb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:ivb */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(dev_priv)) - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ivb */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); - - /* WaApplyL3ControlAndL3ChickenMode:ivb */ - I915_WRITE(GEN7_L3CNTLREG1, - GEN7_WA_FOR_GEN7_L3_CONTROL); - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); if (IS_IVB_GT1(dev_priv)) I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7284,10 +7263,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); } - /* WaForceL3Serialization:ivb */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:ivb workaround. @@ -7302,29 +7277,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) g4x_disable_trickle_feed(dev_priv); - gen7_setup_fixed_func_scheduler(dev_priv); - - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - } - - /* WaDisable4x2SubspanOptimization:ivb */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); snpcr &= ~GEN6_MBC_SNPCR_MASK; snpcr |= GEN6_MBC_SNPCR_MED; -- cgit v1.2.3 From 695a2b11649e99bbf15d278042247042c42b8728 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:37 +0100 Subject: drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-3-chris@chris-wilson.co.uk (cherry picked from commit 7331c356b6d2d8a01422cacab27478a1dba9fa2a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 59 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 61 ----------------------------- 2 files changed, 59 insertions(+), 61 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d9b72679ca82..9923ff1a3982 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -752,6 +752,63 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN6_WIZ_HASHING_16x4); } +static void +vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:vlv */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:vlv */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* WaForceL3Serialization:vlv */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, + GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* + * WaIncreaseL3CreditsForVLVB0:vlv + * This is the hardware default actually. + */ + wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); +} + static void hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1071,6 +1128,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) hsw_gt_workarounds_init(i915, wal); + else if (IS_VALLEYVIEW(i915)) + vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7236644af40b..cea7923c9cd6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6986,24 +6986,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) gen6_check_mch_setup(dev_priv); } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ - u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; - - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} - static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -7290,28 +7272,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) { - /* WaDisableEarlyCull:vlv */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:vlv */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:vlv */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* WaForceL3Serialization:vlv */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* WaDisableDopClockGating:vlv */ I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7321,8 +7286,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - gen7_setup_fixed_func_scheduler(dev_priv); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:vlv workaround. @@ -7336,30 +7299,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_UCGCTL4, I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* - * WaIncreaseL3CreditsForVLVB0:vlv - * This is the hardware default actually. - */ - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); - /* * WaDisableVLVClockGating_VBIIssue:vlv * Disable clock gating on th GCFG unit to prevent a delay -- cgit v1.2.3 From fd2599bda5a989c3332f4956fd7760ec32bd51ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:38 +0100 Subject: drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-4-chris@chris-wilson.co.uk (cherry picked from commit c3b93a943f2c9ee4a106db100a2fc3b2f126bfc5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 41 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 33 ----------------------- 2 files changed, 41 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 9923ff1a3982..c2d57f65b147 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,6 +692,45 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + wa_masked_en(wal, + _3D_CHICKEN, + _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, + GEN6_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); + + wa_masked_en(wal, + _3D_CHICKEN3, + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | + /* + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set + * to normal and 3DSTATE_SF number of SF output attributes + * is more than 16." + */ + _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); +} + static void ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1132,6 +1171,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 6)) + snb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cea7923c9cd6..5db0ebe5eee0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6902,27 +6902,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN6_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_BLBUNIT_CLOCK_GATE_DISABLE | @@ -6945,18 +6924,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); - /* * According to the spec the following bits should be * set in order to enable memory self-refresh and fbc: -- cgit v1.2.3 From eacf21040aa97fd1b3c6bb201bfd43820e1c49be Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:39 +0100 Subject: drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-5-chris@chris-wilson.co.uk (cherry picked from commit 806a45c0838d253e306a6384057e851b65d11099) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 ++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 10 ---------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c2d57f65b147..5ccfe36c2978 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,6 +692,18 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); + + /* WaDisableRenderCachePipelinedFlush:ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); + + /* WaDisable_RenderCache_OperationalFlush:ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + static void snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1173,6 +1185,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) ivb_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 6)) snb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 5)) + ilk_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5db0ebe5eee0..7ebe4fa3a162 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6830,16 +6830,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); g4x_disable_trickle_feed(dev_priv); -- cgit v1.2.3 From 27582a9c917940bc71c0df0b8e022cbde8d735d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:40 +0100 Subject: drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-6-chris@chris-wilson.co.uk (cherry picked from commit 2bcefd0d263ab4a72f0d61921ae6b0dc81606551) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_pm.c | 15 --------------- 2 files changed, 22 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5ccfe36c2978..85d2bef51524 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -693,15 +693,28 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } static void -ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +gen4_gt_workarounds_init(struct drm_i915_private *i915, + struct i915_wa_list *wal) { - wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); + /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + +static void +g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen4_gt_workarounds_init(i915, wal); - /* WaDisableRenderCachePipelinedFlush:ilk */ + /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); +} - /* WaDisable_RenderCache_OperationalFlush:ilk */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +static void +ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + g4x_gt_workarounds_init(i915, wal); + + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); } static void @@ -1187,6 +1200,10 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) snb_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 5)) ilk_gt_workarounds_init(i915, wal); + else if (IS_G4X(i915)) + g4x_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 4)) + gen4_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7ebe4fa3a162..07f663cd2d1c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7308,13 +7308,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, dspclk_gate); - /* WaDisableRenderCachePipelinedFlush */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:g4x */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - g4x_disable_trickle_feed(dev_priv); } @@ -7330,11 +7323,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - intel_uncore_write(uncore, - CACHE_MODE_0, - _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7347,9 +7335,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(RENCLK_GATE_D2, 0); I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From a3005c2edf7e8c3478880db1ca84028a2b6819bb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 12 Jun 2020 15:17:31 +0300 Subject: drm/i915/icl+: Fix hotplug interrupt disabling after storm detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, hotplug interrupts on TypeC ports are left enabled after detecting an interrupt storm, fix this. Reported-by: Kunal Joshi References: https://gitlab.freedesktop.org/drm/intel/-/issues/351 Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/1964 Cc: Kunal Joshi Cc: stable@vger.kernel.org Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200612121731.19596-1-imre.deak@intel.com (cherry picked from commit 587a87b9d7e94927edcdea018565bc1939381eb1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4dc601dffc08..284cf078135a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3125,6 +3125,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) val = I915_READ(GEN11_DE_HPD_IMR); val &= ~hotplug_irqs; + val |= ~enabled_irqs & hotplug_irqs; I915_WRITE(GEN11_DE_HPD_IMR, val); POSTING_READ(GEN11_DE_HPD_IMR); -- cgit v1.2.3 From 8e68c6340d5833077b3753eabedab40755571383 Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Fri, 12 Jun 2020 13:52:37 +0530 Subject: drm/i915/display: Fix the encoder type check For all ddi, encoder->type holds output type as ddi, assigning it to individual o/p types is no more valid. Fixes: 362bfb995b78 ("drm/i915/tgl: Add DKL PHY vswing table for HDMI") v2: Rebase, no functional change. Signed-off-by: Vandita Kulkarni Reviewed-by: Uma Shankar Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200612082237.11886-1-vandita.kulkarni@intel.com (cherry picked from commit 94641eb6c69682884abbecf22fe5b7c185af6a06) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f92c5ed3f5d2..0575a1eea2a1 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2579,14 +2579,14 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, static void tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, - u32 level) + u32 level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) { n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; } else { @@ -2638,7 +2638,7 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, if (intel_phy_is_combo(dev_priv, phy)) icl_combo_phy_ddi_vswing_sequence(encoder, level, type); else - tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level, type); } static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels) -- cgit v1.2.3 From 109be8b23fb2ec8e2d309325ee3b7a49eab63961 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Apr 2020 21:16:30 +0200 Subject: gpu: host1x: Clean up debugfs in error handling path host1x_debug_init() must be reverted in an error handling path. This is already fixed in the remove function since commit 44156eee91ba ("gpu: host1x: Clean up debugfs on removal") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding --- drivers/gpu/host1x/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index d24344e91922..3c0f151847ba 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -468,11 +468,12 @@ static int host1x_probe(struct platform_device *pdev) err = host1x_register(host); if (err < 0) - goto deinit_intr; + goto deinit_debugfs; return 0; -deinit_intr: +deinit_debugfs: + host1x_debug_deinit(host); host1x_intr_deinit(host); deinit_syncpt: host1x_syncpt_deinit(host); -- cgit v1.2.3 From ef4e417eb3ec7fe657928f10ac1d2154d8a5fb38 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 19 May 2020 02:03:01 -0700 Subject: drm/tegra: hub: Do not enable orphaned window group Though the unconditional enable/disable code is not a final solution, we don't want to run into a NULL pointer situation when window group doesn't link to its DC parent if the DC is disabled in Device Tree. So this patch simply adds a check to make sure that window group has a valid parent before running into tegra_windowgroup_enable/disable. Signed-off-by: Nicolin Chen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 8183e617bf6b..a2ef8f218d4e 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -149,7 +149,9 @@ int tegra_display_hub_prepare(struct tegra_display_hub *hub) for (i = 0; i < hub->soc->num_wgrps; i++) { struct tegra_windowgroup *wgrp = &hub->wgrps[i]; - tegra_windowgroup_enable(wgrp); + /* Skip orphaned window group whose parent DC is disabled */ + if (wgrp->parent) + tegra_windowgroup_enable(wgrp); } return 0; @@ -166,7 +168,9 @@ void tegra_display_hub_cleanup(struct tegra_display_hub *hub) for (i = 0; i < hub->soc->num_wgrps; i++) { struct tegra_windowgroup *wgrp = &hub->wgrps[i]; - tegra_windowgroup_disable(wgrp); + /* Skip orphaned window group whose parent DC is disabled */ + if (wgrp->parent) + tegra_windowgroup_disable(wgrp); } } -- cgit v1.2.3 From ca2030d56bb7b73079f834b10de38ced9cac74a3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 12 Jun 2020 17:00:59 +0200 Subject: gpu: host1x: Register child devices In order to remove the dependency on the simple-bus compatible string, which causes the OF driver core to register all child devices, make the host1x driver explicitly register its children. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 3c0f151847ba..d0ebb70e2fdd 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -470,8 +470,14 @@ static int host1x_probe(struct platform_device *pdev) if (err < 0) goto deinit_debugfs; + err = devm_of_platform_populate(&pdev->dev); + if (err < 0) + goto unregister; + return 0; +unregister: + host1x_unregister(host); deinit_debugfs: host1x_debug_deinit(host); host1x_intr_deinit(host); -- cgit v1.2.3 From a101e3dad8a90a840ccb45e492cd2ef7c01199ea Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 12 Jun 2020 17:01:00 +0200 Subject: drm/tegra: hub: Register child devices In order to remove the dependency on the simple-bus compatible string, which causes the OF driver core to register all child devices, make the display-hub driver explicitly register the display controller children. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hub.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index a2ef8f218d4e..22a03f7ffdc1 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -948,6 +948,15 @@ static int tegra_display_hub_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register host1x client: %d\n", err); + err = devm_of_platform_populate(&pdev->dev); + if (err < 0) + goto unregister; + + return err; + +unregister: + host1x_client_unregister(&hub->client); + pm_runtime_disable(&pdev->dev); return err; } -- cgit v1.2.3 From d9a0a05bf8c76e6dc79230669a8b5d685b168c30 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 8 Apr 2020 19:38:02 +0200 Subject: gpu: host1x: Detach driver on unregister Currently when a host1x device driver is unregistered, it is not detached from the host1x controller, which means that the device will stay around and when the driver is registered again, it may bind to the old, stale device rather than the new one that was created from scratch upon driver registration. This in turn can cause various weird crashes within the driver core because it is confronted with a device that was already deleted. Fix this by detaching the driver from the host1x controller when it is unregistered. This ensures that the deleted device also is no longer present in the device list that drivers will bind to. Reported-by: Sowjanya Komatineni Signed-off-by: Thierry Reding Tested-by: Sowjanya Komatineni Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 6a995db51d6d..e201f62d62c0 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -686,8 +686,17 @@ EXPORT_SYMBOL(host1x_driver_register_full); */ void host1x_driver_unregister(struct host1x_driver *driver) { + struct host1x *host1x; + driver_unregister(&driver->driver); + mutex_lock(&devices_lock); + + list_for_each_entry(host1x, &devices, list) + host1x_detach_driver(host1x, driver); + + mutex_unlock(&devices_lock); + mutex_lock(&drivers_lock); list_del_init(&driver->list); mutex_unlock(&drivers_lock); -- cgit v1.2.3 From fce3a51d9b31312aa12ecb72ffabfc4c7b40bdc6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 10 Jun 2020 13:30:59 +0200 Subject: drm/tegra: Add zpos property for cursor planes As of commit 4dc55525b095 ("drm: plane: Verify that no or all planes have a zpos property") a warning is emitted if there's a mix of planes with and without a zpos property. On Tegra, cursor planes are always composited on top of all other planes, which is why they never had a zpos property attached to them. However, since the composition order is fixed, this is trivial to remedy by simply attaching an immutable zpos property to them. v3: do not hardcode zpos for overlay planes used as cursor (Dmitry) v2: hardcode cursor plane zpos to 255 instead of 0 (Ville) Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 83f31c6e891c..04d6848d19fc 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -957,6 +957,7 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, } drm_plane_helper_add(&plane->base, &tegra_cursor_plane_helper_funcs); + drm_plane_create_zpos_immutable_property(&plane->base, 255); return &plane->base; } -- cgit v1.2.3 From b984b6d8b52372b98cce0a6ff6c2787f50665b87 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 14 Jun 2020 00:30:26 +0200 Subject: drm: mcde: Fix display initialization problem The following bug appeared in the MCDE driver/display initialization during the recent merge window. First the place we call drm_fbdev_generic_setup() in the wrong place: this needs to be called AFTER calling drm_dev_register() else we get this splat: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at ../drivers/gpu/drm/drm_fb_helper.c:2198 drm_fbdev_generic_setup+0x164/0x1a8 mcde a0350000.mcde: Device has not been registered. Modules linked in: Hardware name: ST-Ericsson Ux5x0 platform (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x9c/0xb0) [] (dump_stack) from [] (__warn+0xb8/0xd0) [] (__warn) from [] (warn_slowpath_fmt+0x74/0xb8) [] (warn_slowpath_fmt) from [] (drm_fbdev_generic_setup+0x164/0x1a8) [] (drm_fbdev_generic_setup) from [] (mcde_drm_bind+0xc4/0x160) [] (mcde_drm_bind) from [] (try_to_bring_up_master+0x15c/0x1a4) (...) Signed-off-by: Linus Walleij Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200613223027.4189309-1-linus.walleij@linaro.org --- drivers/gpu/drm/mcde/mcde_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c index 84f3e2dbd77b..80082d6dce3a 100644 --- a/drivers/gpu/drm/mcde/mcde_drv.c +++ b/drivers/gpu/drm/mcde/mcde_drv.c @@ -209,7 +209,6 @@ static int mcde_modeset_init(struct drm_device *drm) drm_mode_config_reset(drm); drm_kms_helper_poll_init(drm); - drm_fbdev_generic_setup(drm, 32); return 0; } @@ -264,6 +263,8 @@ static int mcde_drm_bind(struct device *dev) if (ret < 0) goto unbind; + drm_fbdev_generic_setup(drm, 32); + return 0; unbind: -- cgit v1.2.3 From b48fd568ac649f0fd750e89487eaa4a0ef8d2835 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 14 Jun 2020 00:30:27 +0200 Subject: drm: mcde: Fix forgotten user of drm->dev_private Unable to handle kernel NULL pointer dereference at virtual address 00000918 pgd = (ptrval) [00000918] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.7.0-15001-gfa384b50b96b-dirty #514 Hardware name: ST-Ericsson Ux5x0 platform (Device Tree Support) PC is at mcde_display_enable+0x78/0x7c0 LR is at mcde_display_enable+0x78/0x7c0 Fix this by using to_mcde() as in other functions. Fixes: fd7ee85cfe7b ("drm/mcde: Don't use drm_device->dev_private") Signed-off-by: Linus Walleij Reviewed-by: Sam Ravnborg Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200613223027.4189309-2-linus.walleij@linaro.org --- drivers/gpu/drm/mcde/mcde_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mcde/mcde_display.c b/drivers/gpu/drm/mcde/mcde_display.c index 04e1d38d41f7..08802e5177f6 100644 --- a/drivers/gpu/drm/mcde/mcde_display.c +++ b/drivers/gpu/drm/mcde/mcde_display.c @@ -812,7 +812,7 @@ static void mcde_display_enable(struct drm_simple_display_pipe *pipe, struct drm_crtc *crtc = &pipe->crtc; struct drm_plane *plane = &pipe->plane; struct drm_device *drm = crtc->dev; - struct mcde *mcde = drm->dev_private; + struct mcde *mcde = to_mcde(drm); const struct drm_display_mode *mode = &cstate->mode; struct drm_framebuffer *fb = plane->state->fb; u32 format = fb->format->format; -- cgit v1.2.3 From 2de60af4a4633be12e5559ed2d3c27613f715d53 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Mon, 1 Jun 2020 11:04:57 +0800 Subject: drm/i915/gvt: Add one missing MMIO handler for D_SKL_PLUS _PLANE_CTL_3_A, _PLANE_CTL_3_B and _PLANE_SURF_3_A are handled, but miss _PLANE_SURF_3_B. Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200601030457.14002-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 3e88e3b5c43a..89873a7b7f32 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3055,6 +3055,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x72380), D_SKL_PLUS); MMIO_D(_MMIO(0x7239c), D_SKL_PLUS); MMIO_D(_MMIO(_PLANE_SURF_3_A), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_SURF_3_B), D_SKL_PLUS); MMIO_D(CSR_SSP_BASE, D_SKL_PLUS); MMIO_D(CSR_HTP_SKL, D_SKL_PLUS); -- cgit v1.2.3 From fccd0f7cf4d532674d727c7f204f038456675dee Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Mon, 1 Jun 2020 11:06:38 +0800 Subject: drm/i915/gvt: Fix two CFL MMIO handling caused by regression. D_CFL was incorrectly removed for: GAMT_CHKN_BIT_REG GEN9_CTX_PREEMPT_REG V2: Update commit message. V3: Rebase and split Fixes and mis-handled MMIO. Fixes: 43226e6fe798 (drm/i915/gvt: replaced register address with name) Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200601030638.16002-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 89873a7b7f32..f39a6b20bbaf 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3132,8 +3132,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(GAMT_CHKN_BIT_REG, D_KBL); - MMIO_D(GEN9_CTX_PREEMPT_REG, D_KBL | D_SKL); + MMIO_D(GAMT_CHKN_BIT_REG, D_KBL | D_CFL); + MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS); return 0; } -- cgit v1.2.3 From fc1e3aa0337c76b620726e205372073c234616ab Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Mon, 1 Jun 2020 11:07:21 +0800 Subject: drm/i915/gvt: Fix incorrect check of enabled bits in mask registers Using _MASKED_BIT_ENABLE macro to set mask register bits is straight forward and not likely to go wrong. However when checking which bit(s) is(are) enabled, simply bitwise AND value and _MASKED_BIT_ENABLE() won't output expected result. Suppose the register write is disabling bit 1 by setting 0xFFFF0000, however "& _MASKED_BIT_ENABLE(1)" outputs 0x00010000, and the non-zero check will pass which cause the old code consider the new value set as an enabling operation. We found guest set 0x80008000 on boot, and set 0xffff8000 during resume. Both are legal settings but old code will block latter and force vgpu enter fail-safe mode. Introduce two new macro and make proper masked bit check in mmio handler: IS_MASKED_BITS_ENABLED() IS_MASKED_BITS_DISABLED() V2: Rebase. Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200601030721.17129-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 19 ++++++++++--------- drivers/gpu/drm/i915/gvt/mmio_context.h | 6 +++--- drivers/gpu/drm/i915/gvt/reg.h | 5 +++++ 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index f39a6b20bbaf..fadd2adb8030 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1726,13 +1726,13 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(2); write_vreg(vgpu, offset, p_data, bytes); - if (data & _MASKED_BIT_ENABLE(1)) { + if (IS_MASKED_BITS_ENABLED(data, 1)) { enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); return 0; } if (IS_COFFEELAKE(vgpu->gvt->gt->i915) && - data & _MASKED_BIT_ENABLE(2)) { + IS_MASKED_BITS_ENABLED(data, 2)) { enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); return 0; } @@ -1741,14 +1741,14 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * pvinfo, if not, we will treat this guest as non-gvtg-aware * guest, and stop emulating its cfg space, mmio, gtt, etc. */ - if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) || - (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))) - && !vgpu->pv_notified) { + if ((IS_MASKED_BITS_ENABLED(data, GFX_PPGTT_ENABLE) || + IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE)) && + !vgpu->pv_notified) { enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); return 0; } - if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)) - || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) { + if (IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE) || + IS_MASKED_BITS_DISABLED(data, GFX_RUN_LIST_ENABLE)) { enable_execlist = !!(data & GFX_RUN_LIST_ENABLE); gvt_dbg_core("EXECLIST %s on ring %s\n", @@ -1809,7 +1809,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); - if (data & _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)) + if (IS_MASKED_BITS_ENABLED(data, RESET_CTL_REQUEST_RESET)) data |= RESET_CTL_READY_TO_RESET; else if (data & _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)) data &= ~RESET_CTL_READY_TO_RESET; @@ -1827,7 +1827,8 @@ static int csfe_chicken1_mmio_write(struct intel_vgpu *vgpu, (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(0x18); write_vreg(vgpu, offset, p_data, bytes); - if (data & _MASKED_BIT_ENABLE(0x10) || data & _MASKED_BIT_ENABLE(0x8)) + if (IS_MASKED_BITS_ENABLED(data, 0x10) || + IS_MASKED_BITS_ENABLED(data, 0x8)) enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); return 0; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 970704b18f23..3b25e7fe32f6 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -54,8 +54,8 @@ bool is_inhibit_context(struct intel_context *ce); int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, struct i915_request *req); -#define IS_RESTORE_INHIBIT(a) \ - (_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) == \ - ((a) & _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT))) + +#define IS_RESTORE_INHIBIT(a) \ + IS_MASKED_BITS_ENABLED(a, CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) #endif diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index 5b66e14c5b7b..b88e033cbed4 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -94,6 +94,11 @@ #define GFX_MODE_BIT_SET_IN_MASK(val, bit) \ ((((bit) & 0xffff0000) == 0) && !!((val) & (((bit) << 16)))) +#define IS_MASKED_BITS_ENABLED(_val, _b) \ + (((_val) & _MASKED_BIT_ENABLE(_b)) == _MASKED_BIT_ENABLE(_b)) +#define IS_MASKED_BITS_DISABLED(_val, _b) \ + ((_val) & _MASKED_BIT_DISABLE(_b)) + #define FORCEWAKE_RENDER_GEN9_REG 0xa278 #define FORCEWAKE_ACK_RENDER_GEN9_REG 0x0D84 #define FORCEWAKE_BLITTER_GEN9_REG 0xa188 -- cgit v1.2.3 From a291e4fba259a56a6a274c1989997acb6f0bb03a Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Mon, 1 Jun 2020 11:55:56 +0800 Subject: drm/i915/gvt: Use GFP_ATOMIC instead of GFP_KERNEL in atomic context GFP_KERNEL flag specifies a normal kernel allocation in which executing in process context without any locks and can sleep. mmio_diff takes sometime to finish all the diff compare and it has locks, continue using GFP_KERNEL will output below trace if LOCKDEP enabled. Use GFP_ATOMIC instead. V2: Rebase. ===================================================== WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 5.7.0-rc2 #400 Not tainted ----------------------------------------------------- is trying to acquire: ffffffffb47bea20 (fs_reclaim){+.+.}-{0:0}, at: fs_reclaim_acquire.part.0+0x0/0x30 and this task is already holding: ffff88845b85cc90 (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2}, at: vgpu_mmio_diff_show+0xcf/0x2e0 which would create a new lock dependency: (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2} -> (fs_reclaim){+.+.}-{0:0} but this new dependency connects a SOFTIRQ-irq-safe lock: (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2} ... which became SOFTIRQ-irq-safe at: lock_acquire+0x175/0x4e0 _raw_spin_lock_irqsave+0x2b/0x40 shadow_context_status_change+0xfe/0x2f0 notifier_call_chain+0x6a/0xa0 __atomic_notifier_call_chain+0x5f/0xf0 execlists_schedule_out+0x42a/0x820 process_csb+0xe7/0x3e0 execlists_submission_tasklet+0x5c/0x1d0 tasklet_action_common.isra.0+0xeb/0x260 __do_softirq+0x11d/0x56f irq_exit+0xf6/0x100 do_IRQ+0x7f/0x160 ret_from_intr+0x0/0x2a cpuidle_enter_state+0xcd/0x5b0 cpuidle_enter+0x37/0x60 do_idle+0x337/0x3f0 cpu_startup_entry+0x14/0x20 start_kernel+0x58b/0x5c5 secondary_startup_64+0xa4/0xb0 to a SOFTIRQ-irq-unsafe lock: (fs_reclaim){+.+.}-{0:0} ... which became SOFTIRQ-irq-unsafe at: ... lock_acquire+0x175/0x4e0 fs_reclaim_acquire.part.0+0x20/0x30 kmem_cache_alloc_node_trace+0x2e/0x290 alloc_worker+0x2b/0xb0 init_rescuer.part.0+0x17/0xe0 workqueue_init+0x293/0x3bb kernel_init_freeable+0x149/0x325 kernel_init+0x8/0x116 ret_from_fork+0x3a/0x50 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); local_irq_disable(); lock(&gvt->scheduler.mmio_context_lock); lock(fs_reclaim); lock(&gvt->scheduler.mmio_context_lock); *** DEADLOCK *** 3 locks held by cat/1439: #0: ffff888444a23698 (&p->lock){+.+.}-{3:3}, at: seq_read+0x49/0x680 #1: ffff88845b858068 (&gvt->lock){+.+.}-{3:3}, at: vgpu_mmio_diff_show+0xc7/0x2e0 #2: ffff88845b85cc90 (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2}, at: vgpu_mmio_diff_show+0xcf/0x2e0 the dependencies between SOFTIRQ-irq-safe lock and the holding lock: -> (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2} ops: 31 { HARDIRQ-ON-W at: lock_acquire+0x175/0x4e0 _raw_spin_lock_bh+0x2f/0x40 vgpu_mmio_diff_show+0xcf/0x2e0 seq_read+0x242/0x680 full_proxy_read+0x95/0xc0 vfs_read+0xc2/0x1b0 ksys_read+0xc4/0x160 do_syscall_64+0x63/0x290 entry_SYSCALL_64_after_hwframe+0x49/0xb3 IN-SOFTIRQ-W at: lock_acquire+0x175/0x4e0 _raw_spin_lock_irqsave+0x2b/0x40 shadow_context_status_change+0xfe/0x2f0 notifier_call_chain+0x6a/0xa0 __atomic_notifier_call_chain+0x5f/0xf0 execlists_schedule_out+0x42a/0x820 process_csb+0xe7/0x3e0 execlists_submission_tasklet+0x5c/0x1d0 tasklet_action_common.isra.0+0xeb/0x260 __do_softirq+0x11d/0x56f irq_exit+0xf6/0x100 do_IRQ+0x7f/0x160 ret_from_intr+0x0/0x2a cpuidle_enter_state+0xcd/0x5b0 cpuidle_enter+0x37/0x60 do_idle+0x337/0x3f0 cpu_startup_entry+0x14/0x20 start_kernel+0x58b/0x5c5 secondary_startup_64+0xa4/0xb0 INITIAL USE at: lock_acquire+0x175/0x4e0 _raw_spin_lock_irqsave+0x2b/0x40 shadow_context_status_change+0xfe/0x2f0 notifier_call_chain+0x6a/0xa0 __atomic_notifier_call_chain+0x5f/0xf0 execlists_schedule_in+0x2c8/0x690 __execlists_submission_tasklet+0x1303/0x1930 execlists_submit_request+0x1e7/0x230 submit_notify+0x105/0x2a4 __i915_sw_fence_complete+0xaa/0x380 __engine_park+0x313/0x5a0 ____intel_wakeref_put_last+0x3e/0x90 intel_gt_resume+0x41e/0x440 intel_gt_init+0x283/0xbc0 i915_gem_init+0x197/0x240 i915_driver_probe+0xc2d/0x12e0 i915_pci_probe+0xa2/0x1e0 local_pci_probe+0x6f/0xb0 pci_device_probe+0x171/0x230 really_probe+0x17a/0x380 driver_probe_device+0x70/0xf0 device_driver_attach+0x82/0x90 __driver_attach+0x60/0x100 bus_for_each_dev+0xe4/0x140 bus_add_driver+0x257/0x2a0 driver_register+0xd3/0x150 i915_init+0x6d/0x80 do_one_initcall+0xb8/0x3a0 kernel_init_freeable+0x2b4/0x325 kernel_init+0x8/0x116 ret_from_fork+0x3a/0x50 } __key.77812+0x0/0x40 ... acquired at: lock_acquire+0x175/0x4e0 fs_reclaim_acquire.part.0+0x20/0x30 kmem_cache_alloc_trace+0x2e/0x260 mmio_diff_handler+0xc0/0x150 intel_gvt_for_each_tracked_mmio+0x7b/0x140 vgpu_mmio_diff_show+0x111/0x2e0 seq_read+0x242/0x680 full_proxy_read+0x95/0xc0 vfs_read+0xc2/0x1b0 ksys_read+0xc4/0x160 do_syscall_64+0x63/0x290 entry_SYSCALL_64_after_hwframe+0x49/0xb3 the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: -> (fs_reclaim){+.+.}-{0:0} ops: 1999031 { HARDIRQ-ON-W at: lock_acquire+0x175/0x4e0 fs_reclaim_acquire.part.0+0x20/0x30 kmem_cache_alloc_node_trace+0x2e/0x290 alloc_worker+0x2b/0xb0 init_rescuer.part.0+0x17/0xe0 workqueue_init+0x293/0x3bb kernel_init_freeable+0x149/0x325 kernel_init+0x8/0x116 ret_from_fork+0x3a/0x50 SOFTIRQ-ON-W at: lock_acquire+0x175/0x4e0 fs_reclaim_acquire.part.0+0x20/0x30 kmem_cache_alloc_node_trace+0x2e/0x290 alloc_worker+0x2b/0xb0 init_rescuer.part.0+0x17/0xe0 workqueue_init+0x293/0x3bb kernel_init_freeable+0x149/0x325 kernel_init+0x8/0x116 ret_from_fork+0x3a/0x50 INITIAL USE at: lock_acquire+0x175/0x4e0 fs_reclaim_acquire.part.0+0x20/0x30 kmem_cache_alloc_node_trace+0x2e/0x290 alloc_worker+0x2b/0xb0 init_rescuer.part.0+0x17/0xe0 workqueue_init+0x293/0x3bb kernel_init_freeable+0x149/0x325 kernel_init+0x8/0x116 ret_from_fork+0x3a/0x50 } __fs_reclaim_map+0x0/0x60 ... acquired at: lock_acquire+0x175/0x4e0 fs_reclaim_acquire.part.0+0x20/0x30 kmem_cache_alloc_trace+0x2e/0x260 mmio_diff_handler+0xc0/0x150 intel_gvt_for_each_tracked_mmio+0x7b/0x140 vgpu_mmio_diff_show+0x111/0x2e0 seq_read+0x242/0x680 full_proxy_read+0x95/0xc0 vfs_read+0xc2/0x1b0 ksys_read+0xc4/0x160 do_syscall_64+0x63/0x290 entry_SYSCALL_64_after_hwframe+0x49/0xb3 stack backtrace: CPU: 5 PID: 1439 Comm: cat Not tainted 5.7.0-rc2 #400 Hardware name: Intel(R) Client Systems NUC8i7BEH/NUC8BEB, BIOS BECFL357.86A.0056.2018.1128.1717 11/28/2018 Call Trace: dump_stack+0x97/0xe0 check_irq_usage.cold+0x428/0x434 ? check_usage_forwards+0x2c0/0x2c0 ? class_equal+0x11/0x20 ? __bfs+0xd2/0x2d0 ? in_any_class_list+0xa0/0xa0 ? check_path+0x22/0x40 ? check_noncircular+0x150/0x2b0 ? print_circular_bug.isra.0+0x1b0/0x1b0 ? mark_lock+0x13d/0xc50 ? __lock_acquire+0x1e32/0x39b0 __lock_acquire+0x1e32/0x39b0 ? timerqueue_add+0xc1/0x130 ? register_lock_class+0xa60/0xa60 ? mark_lock+0x13d/0xc50 lock_acquire+0x175/0x4e0 ? __zone_pcp_update+0x80/0x80 ? check_flags.part.0+0x210/0x210 ? mark_held_locks+0x65/0x90 ? _raw_spin_unlock_irqrestore+0x32/0x40 ? lockdep_hardirqs_on+0x190/0x290 ? fwtable_read32+0x163/0x480 ? mmio_diff_handler+0xc0/0x150 fs_reclaim_acquire.part.0+0x20/0x30 ? __zone_pcp_update+0x80/0x80 kmem_cache_alloc_trace+0x2e/0x260 mmio_diff_handler+0xc0/0x150 ? vgpu_mmio_diff_open+0x30/0x30 intel_gvt_for_each_tracked_mmio+0x7b/0x140 vgpu_mmio_diff_show+0x111/0x2e0 ? mmio_diff_handler+0x150/0x150 ? rcu_read_lock_sched_held+0xa0/0xb0 ? rcu_read_lock_bh_held+0xc0/0xc0 ? kasan_unpoison_shadow+0x33/0x40 ? __kasan_kmalloc.constprop.0+0xc2/0xd0 seq_read+0x242/0x680 ? debugfs_locked_down.isra.0+0x70/0x70 full_proxy_read+0x95/0xc0 vfs_read+0xc2/0x1b0 ksys_read+0xc4/0x160 ? kernel_write+0xb0/0xb0 ? mark_held_locks+0x24/0x90 do_syscall_64+0x63/0x290 entry_SYSCALL_64_after_hwframe+0x49/0xb3 RIP: 0033:0x7ffbe3e6efb2 Code: c0 e9 c2 fe ff ff 50 48 8d 3d ca cb 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007ffd021c08a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007ffbe3e6efb2 RDX: 0000000000020000 RSI: 00007ffbe34cd000 RDI: 0000000000000003 RBP: 00007ffbe34cd000 R08: 00007ffbe34cc010 R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 0000562b6f0a11f0 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 ------------[ cut here ]------------ Acked-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200601035556.19999-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index ec47d4114554..62e6a14ad58e 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -66,7 +66,7 @@ static inline int mmio_diff_handler(struct intel_gvt *gvt, vreg = vgpu_vreg(param->vgpu, offset); if (preg != vreg) { - node = kmalloc(sizeof(*node), GFP_KERNEL); + node = kmalloc(sizeof(*node), GFP_ATOMIC); if (!node) return -ENOMEM; -- cgit v1.2.3 From 99c7b309472787026ce52fd2bc5d00630567a872 Mon Sep 17 00:00:00 2001 From: Lorenz Brun Date: Thu, 11 Jun 2020 22:11:21 +0200 Subject: drm/amdkfd: Use correct major in devcgroup check The existing code used the major version number of the DRM driver instead of the device major number of the DRM subsystem for validating access for a devices cgroup. This meant that accesses allowed by the devices cgroup weren't permitted and certain accesses denied by the devices cgroup were permitted (if they matched the wrong major device number). Signed-off-by: Lorenz Brun Fixes: 6b855f7b83d2f ("drm/amdkfd: Check against device cgroup") Reviewed-off-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cde5e4c7caa1..06c5fdb1e818 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -1076,7 +1077,7 @@ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) #if defined(CONFIG_CGROUP_DEVICE) struct drm_device *ddev = kfd->ddev; - return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, + return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, DEVCG_ACC_WRITE | DEVCG_ACC_READ); #else -- cgit v1.2.3 From 7386f5c9c8080525985780ed8c3d1475258a4e34 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Jun 2020 14:29:55 -0400 Subject: drm/amdgpu/pm: update comment to clarify Overdrive interfaces Vega10 and previous asics use one interface, vega20 and newer use another. Reviewed-by: Evan Quan Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 775e389c9a13..b3d33307ff09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -696,7 +696,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * default power levels, write "r" (reset) to the file to reset them. * * - * < For Vega20 > + * < For Vega20 and newer ASICs > * * Reading the file will display: * -- cgit v1.2.3 From da9cebe16930f0273278fe893f2809450c61ae41 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Jun 2020 16:36:49 -0400 Subject: drm/amdgpu: fix documentation around busy_percentage Add rename the gpu busy percentage for consistency and add the mem busy percentage documentation. Reviewed-by: Evan Quan Reviewed-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index b3d33307ff09..16596a9ccabe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1668,7 +1668,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, } /** - * DOC: busy_percent + * DOC: gpu_busy_percent * * The amdgpu driver provides a sysfs API for reading how busy the GPU * is as a percentage. The file gpu_busy_percent is used for this. -- cgit v1.2.3 From f167989c673096c8d823b1292c6ee51ed266d178 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Jun 2020 13:53:09 -0700 Subject: drm/msm: Fix address space size after refactor. Previously the address space went from 16M to ~0u, but with the refactor one of the 'f's was dropped, limiting us to 256MB. Additionally, the new interface takes a start and size, not start and end, so we can't just copy and paste. Fixes regressions in dEQP-VK.memory.allocation.random.* Fixes: ccac7ce373c1 ("drm/msm: Refactor address space initialization") Signed-off-by: Eric Anholt Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 3d4efe684a98..5db06b590943 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -194,7 +194,7 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, struct msm_gem_address_space *aspace; aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, - 0xffffffff); + 0xffffffff - SZ_16M); if (IS_ERR(aspace) && !IS_ERR(mmu)) mmu->funcs->destroy(mmu); -- cgit v1.2.3 From 078e8f8ce8ad96fdcb5ca4dc8714d266a77eefc3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Jun 2020 13:53:10 -0700 Subject: drm/msm: Fix setup of a6xx create_address_space. We don't want it under CONFIG_DRM_MSM_GPU_STATE, we need it all the time (like the other GPUs do). Fixes: ccac7ce373c1 ("drm/msm: Refactor address space initialization") Signed-off-by: Eric Anholt Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index a1589e040c57..7768557cdfb2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -893,8 +893,8 @@ static const struct adreno_gpu_funcs funcs = { #if defined(CONFIG_DRM_MSM_GPU_STATE) .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, - .create_address_space = adreno_iommu_create_address_space, #endif + .create_address_space = adreno_iommu_create_address_space, }, .get_timestamp = a6xx_get_timestamp, }; -- cgit v1.2.3 From f78d4032de60f50fd4afaa0fb68ea03b985f820a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:10 +0200 Subject: drm: encoder_slave: fix refcouting error for modules module_put() balances try_module_get(), not request_module(). Fix the error path to match that. Fixes: 2066facca4c7 ("drm/kms: slave encoder interface.") Signed-off-by: Wolfram Sang Reviewed-by: Emil Velikov Acked-by: Daniel Vetter Signed-off-by: Wolfram Sang --- drivers/gpu/drm/drm_encoder_slave.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c index cf804389f5ec..d50a7884e69e 100644 --- a/drivers/gpu/drm/drm_encoder_slave.c +++ b/drivers/gpu/drm/drm_encoder_slave.c @@ -84,7 +84,7 @@ int drm_i2c_encoder_init(struct drm_device *dev, err = encoder_drv->encoder_init(client, dev, encoder); if (err) - goto fail_unregister; + goto fail_module_put; if (info->platform_data) encoder->slave_funcs->set_config(&encoder->base, @@ -92,9 +92,10 @@ int drm_i2c_encoder_init(struct drm_device *dev, return 0; +fail_module_put: + module_put(module); fail_unregister: i2c_unregister_device(client); - module_put(module); fail: return err; } -- cgit v1.2.3 From bb7d93fff62f32f4ff0072b2ace695bd5f5137e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:11 +0200 Subject: drm: encoder_slave: use new I2C API i2c_new_client() is deprecated, use the replacement i2c_new_client_device(). Also, we have a helper to check if a driver is bound. Use it to simplify the code. Note that this changes the errno for a failed device creation from ENOMEM to ENODEV. No callers currently interpret this errno, though, so we use this condensed error check. Signed-off-by: Wolfram Sang Reviewed-by: Emil Velikov Acked-by: Daniel Vetter Signed-off-by: Wolfram Sang --- drivers/gpu/drm/drm_encoder_slave.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c index d50a7884e69e..e464429d32df 100644 --- a/drivers/gpu/drm/drm_encoder_slave.c +++ b/drivers/gpu/drm/drm_encoder_slave.c @@ -61,13 +61,8 @@ int drm_i2c_encoder_init(struct drm_device *dev, request_module("%s%s", I2C_MODULE_PREFIX, info->type); - client = i2c_new_device(adap, info); - if (!client) { - err = -ENOMEM; - goto fail; - } - - if (!client->dev.driver) { + client = i2c_new_client_device(adap, info); + if (!i2c_client_has_driver(client)) { err = -ENODEV; goto fail_unregister; } @@ -96,7 +91,6 @@ fail_module_put: module_put(module); fail_unregister: i2c_unregister_device(client); -fail: return err; } EXPORT_SYMBOL(drm_i2c_encoder_init); -- cgit v1.2.3 From 5f9af404eec82981c4345c9943be48422234e7ab Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Mon, 18 May 2020 22:16:46 +0200 Subject: drm: rcar-du: Fix build error Select DRM_KMS_HELPER dependency. Build error when DRM_KMS_HELPER is not selected: drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xd48): undefined reference to `drm_atomic_helper_bridge_duplicate_state' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xd50): undefined reference to `drm_atomic_helper_bridge_destroy_state' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xd70): undefined reference to `drm_atomic_helper_bridge_reset' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xdc8): undefined reference to `drm_atomic_helper_connector_reset' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xde0): undefined reference to `drm_helper_probe_single_connector_modes' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xe08): undefined reference to `drm_atomic_helper_connector_duplicate_state' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xe10): undefined reference to `drm_atomic_helper_connector_destroy_state' Fixes: c6a27fa41fab ("drm: rcar-du: Convert LVDS encoder code to bridge driver") Cc: Signed-off-by: Daniel Gomez Reviewed-by: Emil Velikov Reviewed-by: Kieran Bingham Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/rcar-du/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index 0919f1f159a4..f65d1489dc50 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -31,6 +31,7 @@ config DRM_RCAR_DW_HDMI config DRM_RCAR_LVDS tristate "R-Car DU LVDS Encoder Support" depends on DRM && DRM_BRIDGE && OF + select DRM_KMS_HELPER select DRM_PANEL select OF_FLATTREE select OF_OVERLAY -- cgit v1.2.3 From efb94790852ae673b18efde1b171d284689ff333 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Mon, 15 Jun 2020 08:19:34 -0500 Subject: drm/panel-simple: fix connector type for LogicPD Type28 Display The LogicPD Type28 display used by several Logic PD products has not worked since v5.6. The connector type for the LogicPD Type 28 display is missing and drm_panel_bridge_add() requires connector type to be set. Signed-off-by: Adam Ford Fixes: 0d35408afbeb ("drm/panel: simple: Add Logic PD Type 28 display support") Cc: Adam Ford Cc: Sam Ravnborg Cc: Thierry Reding Cc: dri-devel@lists.freedesktop.org Cc: # v5.6+ Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200615131934.12440-1-aford173@gmail.com --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index b6ecd1552132..334e9de5b2c8 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2495,6 +2495,7 @@ static const struct panel_desc logicpd_type_28 = { .bus_format = MEDIA_BUS_FMT_RGB888_1X24, .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE | DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, }; static const struct panel_desc mitsubishi_aa070mc01 = { -- cgit v1.2.3 From 8a4f5e1185db61bce6ce3a5dce6381a77bcf94e6 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:28:09 +0300 Subject: drm/panel-simple: fix connector type for newhaven_nhd_43_480272ef_atxl Add connector type for newhaven_nhd_43_480272ef_atxl, as drm_panel_bridge_add() requires connector type to be set. Signed-off-by: Tomi Valkeinen Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200609102809.753203-1-tomi.valkeinen@ti.com --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 334e9de5b2c8..5178f87d6574 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2664,6 +2664,7 @@ static const struct panel_desc newhaven_nhd_43_480272ef_atxl = { .bus_format = MEDIA_BUS_FMT_RGB888_1X24, .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE | DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, }; static const struct display_timing nlt_nl192108ac18_02d_timing = { -- cgit v1.2.3 From 30480e6ed508e3ff7a3e03c975696aa5196ffe8a Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 22 Jun 2020 10:55:38 -0600 Subject: drm/msm: Fix up the rest of the messed up address sizes msm_gem_address_space_create() changed to take a start/length instead of a start/end for the iova space but all of the callers were just cut and pasted from the old usage. Most of the mistakes have been fixed up so just catch up the rest. Fixes: ccac7ce373c1 ("drm/msm: Refactor address space initialization") Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +- drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 2 +- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 60f6472a3e58..6021f8d9efd1 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -408,7 +408,7 @@ a2xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) struct msm_gem_address_space *aspace; aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, - SZ_16M + 0xfff * SZ_64K); + 0xfff * SZ_64K); if (IS_ERR(aspace) && !IS_ERR(mmu)) mmu->funcs->destroy(mmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 096be97ce9f9..21e77d67151f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1121,7 +1121,7 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) return -ENODEV; mmu = msm_iommu_new(gmu->dev, domain); - gmu->aspace = msm_gem_address_space_create(mmu, "gmu", 0x0, 0x7fffffff); + gmu->aspace = msm_gem_address_space_create(mmu, "gmu", 0x0, 0x80000000); if (IS_ERR(gmu->aspace)) { iommu_domain_free(domain); return PTR_ERR(gmu->aspace); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index b8615d4fe8a3..680527e28d09 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -780,7 +780,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) mmu = msm_iommu_new(dpu_kms->dev->dev, domain); aspace = msm_gem_address_space_create(mmu, "dpu1", - 0x1000, 0xfffffff); + 0x1000, 0x100000000 - 0x1000); if (IS_ERR(aspace)) { mmu->funcs->destroy(mmu); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 08897184b1d9..fc6a3f8134c7 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -514,7 +514,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) config->iommu); aspace = msm_gem_address_space_create(mmu, - "mdp4", 0x1000, 0xffffffff); + "mdp4", 0x1000, 0x100000000 - 0x1000); if (IS_ERR(aspace)) { if (!IS_ERR(mmu)) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 54631fbd9389..8586d2cf1d94 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -641,7 +641,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) mmu = msm_iommu_new(iommu_dev, config->platform.iommu); aspace = msm_gem_address_space_create(mmu, "mdp5", - 0x1000, 0xffffffff); + 0x1000, 0x100000000 - 0x1000); if (IS_ERR(aspace)) { if (!IS_ERR(mmu)) -- cgit v1.2.3 From 6c22bc18a3b93a38018844636557ad02e588e055 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 31 May 2020 11:30:24 +0200 Subject: drm: panel-orientation-quirks: Add quirk for Asus T101HA panel Like the Asus T100HA the Asus T101HA also uses a panel which has been mounted 90 degrees rotated, albeit in the opposite direction. Add a quirk for this. Reviewed-by: Emil Velikov Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20200531093025.28050-1-hdegoede@redhat.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index ffd95bfeaa94..d11d83703931 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -121,6 +121,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100HAN"), }, .driver_data = (void *)&asus_t100ha, + }, { /* Asus T101HA */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T101HA"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), -- cgit v1.2.3 From a05caf9e62a85d12da27e814ac13195f4683f21c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 31 May 2020 11:30:25 +0200 Subject: drm: panel-orientation-quirks: Use generic orientation-data for Acer S1003 The Acer S1003 has proper DMI strings for sys-vendor and product-name, so we do not need to match by BIOS-date. This means that the Acer S1003 can use the generic lcd800x1280_rightside_up drm_dmi_panel_orientation_data struct which is also used by other quirks. Reviewed-by: Emil Velikov Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20200531093025.28050-2-hdegoede@redhat.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index d11d83703931..d00ea384dcbf 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -30,12 +30,6 @@ struct drm_dmi_panel_orientation_data { int orientation; }; -static const struct drm_dmi_panel_orientation_data acer_s1003 = { - .width = 800, - .height = 1280, - .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, -}; - static const struct drm_dmi_panel_orientation_data asus_t100ha = { .width = 800, .height = 1280, @@ -114,7 +108,7 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"), }, - .driver_data = (void *)&acer_s1003, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Asus T100HA */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -- cgit v1.2.3 From 842ec61f4006a6477a9deaedd69131e9f46e4cb5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 13 May 2020 16:07:24 +0200 Subject: drm/sun4i: mixer: Call of_dma_configure if there's an IOMMU The main DRM device is actually a virtual device so it doesn't have the iommus property, which is instead on the DMA masters, in this case the mixers. Add a call to of_dma_configure with the mixers DT node but on the DRM virtual device to configure it in the same way than the mixers. Reviewed-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/9a4daf438dd3f2fe07afb23688bfb793a0613d7d.1589378833.git-series.maxime@cerno.tech (cherry picked from commit b718102dbdfd0285ad559687a30e27cc9124e592) [Maxime: Applied to -fixes since it missed the merge window and display is broken without it] Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun8i_mixer.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c index 56cc037fd312..cc4fb916318f 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.c +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c @@ -363,6 +363,19 @@ static int sun8i_mixer_bind(struct device *dev, struct device *master, mixer->engine.ops = &sun8i_engine_ops; mixer->engine.node = dev->of_node; + if (of_find_property(dev->of_node, "iommus", NULL)) { + /* + * This assume we have the same DMA constraints for + * all our the mixers in our pipeline. This sounds + * bad, but it has always been the case for us, and + * DRM doesn't do per-device allocation either, so we + * would need to fix DRM first... + */ + ret = of_dma_configure(drm->dev, dev->of_node, true); + if (ret) + return ret; + } + /* * While this function can fail, we shouldn't do anything * if this happens. Some early DE2 DT entries don't provide -- cgit v1.2.3 From ee434a4f9f5ea15b0f84bddd8c012838cf9472c5 Mon Sep 17 00:00:00 2001 From: John van der Kamp Date: Tue, 23 Jun 2020 23:30:54 +0200 Subject: drm/amdgpu/display: Unlock mutex on error Make sure we pass through ret label to unlock the mutex. Signed-off-by: John van der Kamp Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index dcf84a61de37..949d10ef8304 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -510,8 +510,10 @@ static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, struct bin srm = psp_get_srm(work->hdcp.config.psp.handle, &srm_version, &srm_size); - if (!srm) - return -EINVAL; + if (!srm) { + ret = -EINVAL; + goto ret; + } if (pos >= srm_size) ret = 0; -- cgit v1.2.3 From 35f760b44b1b9cb16a306bdcc7220fbbf78c4789 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 22 Jun 2020 23:31:22 +0300 Subject: drm/radeon: fix fb_div check in ni_init_smc_spll_table() clk_s is checked twice in a row in ni_init_smc_spll_table(). fb_div should be checked instead. Fixes: 69e0b57a91ad ("drm/radeon/kms: add dpm support for cayman (v5)") Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index b57c37ddd164..c7fbb7932f37 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -2127,7 +2127,7 @@ static int ni_init_smc_spll_table(struct radeon_device *rdev) if (clk_s & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_SHIFT)) ret = -EINVAL; - if (clk_s & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_SHIFT)) + if (fb_div & ~(SMC_NISLANDS_SPLL_DIV_TABLE_FBDIV_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_FBDIV_SHIFT)) ret = -EINVAL; if (clk_v & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKV_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKV_SHIFT)) -- cgit v1.2.3 From dc5bdb68b5b369d5bc7d1de96fa64cc1737a6320 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 24 Jun 2020 11:29:10 +0200 Subject: drm/fb-helper: Fix vt restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past we had a pile of hacks to orchestrate access between fbdev emulation and native kms clients. We've tried to streamline this, by always preferring the kms side above fbdev calls when a drm master exists, because drm master controls access to the display resources. Unfortunately this breaks existing userspace, specifically Xorg. When exiting Xorg first restores the console to text mode using the KDSET ioctl on the vt. This does nothing, because a drm master is still around. Then it drops the drm master status, which again does nothing, because logind is keeping additional drm fd open to be able to orchestrate vt switches. In the past this is the point where fbdev was restored, as part of the ->lastclose hook on the drm side. Now to fix this regression we don't want to go back to letting fbdev restore things whenever it feels like, or to the pile of hacks we've had before. Instead try and go with a minimal exception to make the KDSET case work again, and nothing else. This means that if userspace does a KDSET call when switching between graphical compositors, there will be some flickering with fbcon showing up for a bit. But a) that's not a regression and b) userspace can fix it by improving the vt switching dance - logind should have all the information it needs. While pondering all this I'm also wondering wheter we should have a SWITCH_MASTER ioctl to allow race-free master status handover. But that's for another day. v2: Somehow forgot to cc all the fbdev people. v3: Fix typo Alex spotted. Reviewed-by: Alex Deucher Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208179 Cc: shlomo@fastmail.com Reported-and-Tested-by: shlomo@fastmail.com Cc: Michel Dänzer Fixes: 64914da24ea9 ("drm/fbdev-helper: don't force restores") Cc: Noralf Trønnes Cc: Thomas Zimmermann Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.7+ Cc: Bartlomiej Zolnierkiewicz Cc: Geert Uytterhoeven Cc: Nathan Chancellor Cc: Qiujun Huang Cc: Peter Rosin Cc: linux-fbdev@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200624092910.3280448-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_fb_helper.c | 63 ++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 170aa7689110..5609e164805f 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -227,18 +227,9 @@ int drm_fb_helper_debug_leave(struct fb_info *info) } EXPORT_SYMBOL(drm_fb_helper_debug_leave); -/** - * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration - * @fb_helper: driver-allocated fbdev helper, can be NULL - * - * This should be called from driver's drm &drm_driver.lastclose callback - * when implementing an fbcon on top of kms using this helper. This ensures that - * the user isn't greeted with a black screen when e.g. X dies. - * - * RETURNS: - * Zero if everything went ok, negative error code otherwise. - */ -int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) +static int +__drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper, + bool force) { bool do_delayed; int ret; @@ -250,7 +241,16 @@ int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) return 0; mutex_lock(&fb_helper->lock); - ret = drm_client_modeset_commit(&fb_helper->client); + if (force) { + /* + * Yes this is the _locked version which expects the master lock + * to be held. But for forced restores we're intentionally + * racing here, see drm_fb_helper_set_par(). + */ + ret = drm_client_modeset_commit_locked(&fb_helper->client); + } else { + ret = drm_client_modeset_commit(&fb_helper->client); + } do_delayed = fb_helper->delayed_hotplug; if (do_delayed) @@ -262,6 +262,22 @@ int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) return ret; } + +/** + * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration + * @fb_helper: driver-allocated fbdev helper, can be NULL + * + * This should be called from driver's drm &drm_driver.lastclose callback + * when implementing an fbcon on top of kms using this helper. This ensures that + * the user isn't greeted with a black screen when e.g. X dies. + * + * RETURNS: + * Zero if everything went ok, negative error code otherwise. + */ +int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) +{ + return __drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper, false); +} EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked); #ifdef CONFIG_MAGIC_SYSRQ @@ -1318,6 +1334,7 @@ int drm_fb_helper_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct fb_var_screeninfo *var = &info->var; + bool force; if (oops_in_progress) return -EBUSY; @@ -1327,7 +1344,25 @@ int drm_fb_helper_set_par(struct fb_info *info) return -EINVAL; } - drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper); + /* + * Normally we want to make sure that a kms master takes precedence over + * fbdev, to avoid fbdev flickering and occasionally stealing the + * display status. But Xorg first sets the vt back to text mode using + * the KDSET IOCTL with KD_TEXT, and only after that drops the master + * status when exiting. + * + * In the past this was caught by drm_fb_helper_lastclose(), but on + * modern systems where logind always keeps a drm fd open to orchestrate + * the vt switching, this doesn't work. + * + * To not break the userspace ABI we have this special case here, which + * is only used for the above case. Everything else uses the normal + * commit function, which ensures that we never steal the display from + * an active drm master. + */ + force = var->activate & FB_ACTIVATE_KD_TEXT; + + __drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper, force); return 0; } -- cgit v1.2.3 From edfaf6fa73f15568d4337f208b2333f647c35810 Mon Sep 17 00:00:00 2001 From: Wenhui Sheng Date: Thu, 18 Jun 2020 15:37:04 +0800 Subject: drm/amdgpu: add fw release for sdma v5_0 sdma fw isn't released when module exit Reviewed-by: Hawking Zhang Signed-off-by: Wenhui Sheng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b544baf306f2..5d71c23e2640 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1298,8 +1298,12 @@ static int sdma_v5_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.instance[i].fw != NULL) + release_firmware(adev->sdma.instance[i].fw); + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + } return 0; } -- cgit v1.2.3 From 5ae9c378c3d88b40af72f8e8f961808e29f3e70b Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Mon, 1 Jun 2020 16:12:09 +0800 Subject: drm/amd/display: Enable output_bpc property on all outputs [Why] Connector property output_bpc is available on DP/eDP only. New IGT tests would benifit if this property works on HDMI. [How] Enable this read-only property on all types of connectors. Signed-off-by: Stylon Wang Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 076af267b488..1d692f4f42f3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1058,7 +1058,6 @@ static const struct { {"link_settings", &dp_link_settings_debugfs_fops}, {"phy_settings", &dp_phy_settings_debugfs_fop}, {"test_pattern", &dp_phy_test_pattern_fops}, - {"output_bpc", &output_bpc_fops}, {"vrr_range", &vrr_range_fops}, #ifdef CONFIG_DRM_AMD_DC_HDCP {"hdcp_sink_capability", &hdcp_sink_capability_fops}, @@ -1142,6 +1141,9 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) debugfs_create_file_unsafe("force_yuv420_output", 0644, dir, connector, &force_yuv420_output_fops); + debugfs_create_file("output_bpc", 0644, dir, connector, + &output_bpc_fops); + connector->debugfs_dpcd_address = 0; connector->debugfs_dpcd_size = 0; -- cgit v1.2.3 From fa7041d9d2fc7401cece43f305eb5b87b7017fc4 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Fri, 12 Jun 2020 19:04:18 +0800 Subject: drm/amd/display: Fix ineffective setting of max bpc property [Why] Regression was introduced where setting max bpc property has no effect on the atomic check and final commit. It has the same effect as max bpc being stuck at 8. [How] Correctly propagate max bpc with the new connector state. Signed-off-by: Stylon Wang Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7ced9f87be97..10ac8076d4f2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5024,7 +5024,8 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, struct drm_connector *connector = &aconnector->base; struct amdgpu_device *adev = connector->dev->dev_private; struct dc_stream_state *stream; - int requested_bpc = connector->state ? connector->state->max_requested_bpc : 8; + const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL; + int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8; enum dc_status dc_result = DC_OK; do { -- cgit v1.2.3 From b5b78a6c8d8cb9c307bc6b16a754603424459d6e Mon Sep 17 00:00:00 2001 From: Bernard Zhao Date: Sat, 20 Jun 2020 17:11:52 +0800 Subject: drm/amd: fix potential memleak in err branch The function kobject_init_and_add alloc memory like: kobject_init_and_add->kobject_add_varg->kobject_set_name_vargs ->kvasprintf_const->kstrdup_const->kstrdup->kmalloc_track_caller ->kmalloc_slab, in err branch this memory not free. If use kmemleak, this path maybe catched. These changes are to add kobject_put in kobject_init_and_add failed branch, fix potential memleak. Signed-off-by: Bernard Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d27221ddcdeb..0e0c42e9f6a3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -428,6 +428,7 @@ struct kfd_process *kfd_create_process(struct file *filep) (int)process->lead_thread->pid); if (ret) { pr_warn("Creating procfs pid directory failed"); + kobject_put(process->kobj); goto out; } -- cgit v1.2.3 From 7e89e4aaa9ae83107d059c186955484b3aa6eb23 Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Thu, 25 Jun 2020 21:50:42 +0500 Subject: drm/amd/powerplay: Fix NULL dereference in lock_bus() on Vega20 w/o RAS I updated my system with Radeon VII from kernel 5.6 to kernel 5.7, and following started to happen on each boot: ... BUG: kernel NULL pointer dereference, address: 0000000000000128 ... CPU: 9 PID: 1940 Comm: modprobe Tainted: G E 5.7.2-200.im0.fc32.x86_64 #1 Hardware name: System manufacturer System Product Name/PRIME X570-P, BIOS 1407 04/02/2020 RIP: 0010:lock_bus+0x42/0x60 [amdgpu] ... Call Trace: i2c_smbus_xfer+0x3d/0xf0 i2c_default_probe+0xf3/0x130 i2c_detect.isra.0+0xfe/0x2b0 ? kfree+0xa3/0x200 ? kobject_uevent_env+0x11f/0x6a0 ? i2c_detect.isra.0+0x2b0/0x2b0 __process_new_driver+0x1b/0x20 bus_for_each_dev+0x64/0x90 ? 0xffffffffc0f34000 i2c_register_driver+0x73/0xc0 do_one_initcall+0x46/0x200 ? _cond_resched+0x16/0x40 ? kmem_cache_alloc_trace+0x167/0x220 ? do_init_module+0x23/0x260 do_init_module+0x5c/0x260 __do_sys_init_module+0x14f/0x170 do_syscall_64+0x5b/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... Error appears when some i2c device driver tries to probe for devices using adapter registered by `smu_v11_0_i2c_eeprom_control_init()`. Code supporting this adapter requires `adev->psp.ras.ras` to be not NULL, which is true only when `amdgpu_ras_init()` detects HW support by calling `amdgpu_ras_check_supported()`. Before 9015d60c9ee1, adapter was registered by -> amdgpu_device_ip_init() -> amdgpu_ras_recovery_init() -> amdgpu_ras_eeprom_init() -> smu_v11_0_i2c_eeprom_control_init() after verifying that `adev->psp.ras.ras` is not NULL in `amdgpu_ras_recovery_init()`. Currently it is registered unconditionally by -> amdgpu_device_ip_init() -> pp_sw_init() -> hwmgr_sw_init() -> vega20_smu_init() -> smu_v11_0_i2c_eeprom_control_init() Fix simply adds HW support check (ras == NULL => no support) before calling `smu_v11_0_i2c_eeprom_control_{init,fini}()`. Please note that there is a chance that similar fix is also required for CHIP_ARCTURUS. I do not know whether any actual Arcturus hardware without RAS exist, and whether calling `smu_i2c_eeprom_init()` makes any sense when there is no HW support. Cc: stable@vger.kernel.org Fixes: 9015d60c9ee1 ("drm/amdgpu: Move EEPROM I2C adapter to amdgpu_device") Signed-off-by: Ivan Mironov Tested-by: Bjorn Nostvold Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c index 2fb97554134f..c2e0fbbccf56 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c @@ -522,9 +522,11 @@ static int vega20_smu_init(struct pp_hwmgr *hwmgr) priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].version = 0x01; priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size = sizeof(DpmActivityMonitorCoeffInt_t); - ret = smu_v11_0_i2c_eeprom_control_init(&adev->pm.smu_i2c); - if (ret) - goto err4; + if (adev->psp.ras.ras) { + ret = smu_v11_0_i2c_eeprom_control_init(&adev->pm.smu_i2c); + if (ret) + goto err4; + } return 0; @@ -560,7 +562,8 @@ static int vega20_smu_fini(struct pp_hwmgr *hwmgr) (struct vega20_smumgr *)(hwmgr->smu_backend); struct amdgpu_device *adev = hwmgr->adev; - smu_v11_0_i2c_eeprom_control_fini(&adev->pm.smu_i2c); + if (adev->psp.ras.ras) + smu_v11_0_i2c_eeprom_control_fini(&adev->pm.smu_i2c); if (priv) { amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_PPTABLE].handle, -- cgit v1.2.3 From aee83fbdbfa112c72fef3b91cd38ae93841d1075 Mon Sep 17 00:00:00 2001 From: Tamseel Shams Date: Thu, 21 May 2020 19:52:10 +0530 Subject: drm/exynos: Remove dev_err() on platform_get_irq() failure platform_get_irq() will call dev_err() itself on failure, so there is no need for the driver to also do this. This is detected by coccinelle. Signed-off-by: Tamseel Shams Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index fcee33a43aca..03be31427181 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1498,7 +1498,6 @@ static int g2d_probe(struct platform_device *pdev) g2d->irq = platform_get_irq(pdev, 0); if (g2d->irq < 0) { - dev_err(dev, "failed to get irq\n"); ret = g2d->irq; goto err_put_clk; } -- cgit v1.2.3 From b9c633882de4601015625f9136f248e9abca8a7a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 1 Jun 2020 17:06:30 +0900 Subject: drm/exynos: Properly propagate return value in drm_iommu_attach_device() Propagate the proper error codes from the called functions instead of unconditionally returning 0. Reported-by: kbuild test robot Signed-off-by: Marek Szyprowski Merge conflict so merged it manually. Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index 619f81435c1b..58b89ec11b0e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -61,7 +61,7 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev, struct device *subdrv_dev, void **dma_priv) { struct exynos_drm_private *priv = drm_dev->dev_private; - int ret; + int ret = 0; if (get_dma_ops(priv->dma_dev) != get_dma_ops(subdrv_dev)) { DRM_DEV_ERROR(subdrv_dev, "Device %s lacks support for IOMMU\n", @@ -92,7 +92,7 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev, if (ret) clear_dma_max_seg_size(subdrv_dev); - return 0; + return ret; } /* -- cgit v1.2.3 From d4f5a095daf0d25f0b385e1ef26338608433a4c5 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 15 Jun 2020 00:49:28 -0500 Subject: drm/exynos: fix ref count leak in mic_pre_enable in mic_pre_enable, pm_runtime_get_sync is called which increments the counter even in case of failure, leading to incorrect ref count. In case of failure, decrement the ref count before returning. Signed-off-by: Navid Emamdoost Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_mic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index a86abc173605..3821ea76a703 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -269,8 +269,10 @@ static void mic_pre_enable(struct drm_bridge *bridge) goto unlock; ret = pm_runtime_get_sync(mic->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(mic->dev); goto unlock; + } mic_set_path(mic, 1); -- cgit v1.2.3 From 55fd7e0222ea01246ef3e6aae28b5721fdfb790f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 10 Jun 2020 13:18:07 -0700 Subject: drm/i915: Include asm sources for {ivb, hsw}_clear_kernel.c Alexandre Oliva has recently removed these files from Linux Libre with concerns that the sources weren't available. The sources are available on IGT repository, and only open source tools are used to generate the {ivb,hsw}_clear_kernel.c files. However, the remaining concern from Alexandre Oliva was around GPL license and the source not been present when distributing the code. So, it looks like 2 alternatives are possible, the use of linux-firmware.git repository to store the blob or making sure that the source is also present in our tree. Since the goal is to limit the i915 firmware to only the micro-controller blobs let's make sure that we do include the asm sources here in our tree. Btw, I tried to have some diligence here and make sure that the asms that these commits are adding are truly the source for the mentioned files: igt$ ./scripts/generate_clear_kernel.sh -g ivb \ -m ~/mesa/build/src/intel/tools/i965_asm Output file not specified - using default file "ivb-cb_assembled" Generating gen7 CB Kernel assembled file "ivb_clear_kernel.c" for i915 driver... igt$ diff ~/i915/drm-tip/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c \ ivb_clear_kernel.c < * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC > * Generated by: IGT Gpu Tools on Mon 08 Jun 2020 10:00:54 AM PDT 61c61 < }; > }; \ No newline at end of file igt$ ./scripts/generate_clear_kernel.sh -g hsw \ -m ~/mesa/build/src/intel/tools/i965_asm Output file not specified - using default file "hsw-cb_assembled" Generating gen7.5 CB Kernel assembled file "hsw_clear_kernel.c" for i915 driver... igt$ diff ~/i915/drm-tip/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c \ hsw_clear_kernel.c 5c5 < * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC > * Generated by: IGT Gpu Tools on Mon 08 Jun 2020 10:01:42 AM PDT 61c61 < }; > }; \ No newline at end of file Used IGT and Mesa master repositories from Fri Jun 5 2020) IGT: 53e8c878a6fb ("tests/kms_chamelium: Force reprobe after replugging the connector") Mesa: 5d13c7477eb1 ("radv: set keep_statistic_info with RADV_DEBUG=shaderstats") Mesa built with: meson build -D platforms=drm,x11 -D dri-drivers=i965 \ -D gallium-drivers=iris -D prefix=/usr \ -D libdir=/usr/lib64/ -Dtools=intel \ -Dkulkan-drivers=intel && ninja -C build v2: Header clean-up and include build instructions in a readme (Chris) Modified commit message to respect check-patch Reference: http://www.fsfla.org/pipermail/linux-libre/2020-June/003374.html Reference: http://www.fsfla.org/pipermail/linux-libre/2020-June/003375.html Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Cc: # v5.7+ Cc: Alexandre Oliva Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Mika Kuoppala Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Signed-off-by: Rodrigo Vivi Reviewed-by: Jon Bloomfield Link: https://patchwork.freedesktop.org/patch/msgid/20200610201807.191440-1-rodrigo.vivi@intel.com (cherry picked from commit 5a7eeb8ba143d860050ecea924a8f074f02d8023) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/shaders/README | 46 ++++++++ .../gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm | 119 +++++++++++++++++++++ .../gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm | 117 ++++++++++++++++++++ 3 files changed, 282 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/shaders/README create mode 100644 drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm create mode 100644 drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README new file mode 100644 index 000000000000..e7e96d7073c7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/README @@ -0,0 +1,46 @@ +ASM sources for auto generated shaders +====================================== + +The i915/gt/hsw_clear_kernel.c and i915/gt/ivb_clear_kernel.c files contain +pre-compiled batch chunks that will clear any residual render cache during +context switch. + +They are generated from their respective platform ASM files present on +i915/gt/shaders/clear_kernel directory. + +The generated .c files should never be modified directly. Instead, any modification +needs to be done on the on their respective ASM files and build instructions below +needes to be followed. + +Building +======== + +Environment +----------- + +IGT GPU tool scripts and the Mesa's i965 instruction assembler tool are used +on building. + +Please make sure your Mesa tool is compiled with "-Dtools=intel" and +"-Ddri-drivers=i965", and run this script from IGT source root directory" + +The instructions bellow assume: + * IGT gpu tools source code is located on your home directory (~) as ~/igt + * Mesa source code is located on your home directory (~) as ~/mesa + and built under the ~/mesa/build directory + * Linux kernel source code is under your home directory (~) as ~/linux + +Instructions +------------ + +~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm \ + ~/igt/lib/i915/shaders/clear_kernel/ivb.asm +~ $ cd ~/igt +igt $ ./scripts/generate_clear_kernel.sh -g ivb \ + -m ~/mesa/build/src/intel/tools/i965_asm + +~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm \ + ~/igt/lib/i915/shaders/clear_kernel/hsw.asm +~ $ cd ~/igt +igt $ ./scripts/generate_clear_kernel.sh -g hsw \ + -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm new file mode 100644 index 000000000000..5fdf384bb621 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* + * Kernel for PAVP buffer clear. + * + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears + * 512 bytes of Render Cache. + */ + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/** + * Curbe Format + * + * DW 1.0 - Block Offset to write Render Cache + * DW 1.1 [15:0] - Clear Word + * DW 1.2 - Delay iterations + * DW 1.3 - Enable Instrumentation (only for debug) + * DW 1.4 - Rsvd (intended for context ID) + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * + * Binding Table + * + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) + * BTI 1: Wait/Instrumentation Buffer + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) + * Expected to be initialized to 0 by driver/another kernel + * Layout: + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N + */ +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; +(+f0.0) jmpi(1) 352D { align1 WE_all 1N }; + +/** + * State Register has info on where this thread is running + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + */ +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ + +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; + +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; + +/* Media block read to fetch current value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 + + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; + +/* Media block write for updated value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; + +/* Delay thread for specified parameter */ +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; +(+f0.0) jmpi(1) -32D { align1 WE_all 1N }; + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/* Initialize looping parameters */ +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ + +/* Write 32x16 all "0" block */ +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; +mov(16) g3<1>UD 0x00000000UD { align1 1H }; +mov(16) g4<1>UD 0x00000000UD { align1 1H }; +mov(16) g5<1>UD 0x00000000UD { align1 1H }; +mov(16) g6<1>UD 0x00000000UD { align1 1H }; +mov(16) g7<1>UD 0x00000000UD { align1 1H }; +mov(16) g8<1>UD 0x00000000UD { align1 1H }; +mov(16) g9<1>UD 0x00000000UD { align1 1H }; +mov(16) g10<1>UD 0x00000000UD { align1 1H }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; + +/* Now, clear all GRF registers */ +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; +(+f0.0) jmpi(1) -64D { align1 WE_all 1N }; + +/* Terminante the thread */ +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm new file mode 100644 index 000000000000..97c7ac9e3854 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* + * Kernel for PAVP buffer clear. + * + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears + * 512 bytes of Render Cache. + */ + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/** + * Curbe Format + * + * DW 1.0 - Block Offset to write Render Cache + * DW 1.1 [15:0] - Clear Word + * DW 1.2 - Delay iterations + * DW 1.3 - Enable Instrumentation (only for debug) + * DW 1.4 - Rsvd (intended for context ID) + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * + * Binding Table + * + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) + * BTI 1: Wait/Instrumentation Buffer + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) + * Expected to be initialized to 0 by driver/another kernel + * Layout : + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N + */ +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; +(+f0.0) jmpi(1) 44D { align1 WE_all 1N }; + +/** + * State Register has info on where this thread is running + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + */ +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ + +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; + +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; + +/* Media block read to fetch current value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; + +/* Media block write for updated value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; +/* Delay thread for specified parameter */ +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; +(+f0.0) jmpi(1) -4D { align1 WE_all 1N }; + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/* Initialize looping parameters */ +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ + +/* Write 32x16 all "0" block */ +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; +mov(16) g3<1>UD 0x00000000UD { align1 1H }; +mov(16) g4<1>UD 0x00000000UD { align1 1H }; +mov(16) g5<1>UD 0x00000000UD { align1 1H }; +mov(16) g6<1>UD 0x00000000UD { align1 1H }; +mov(16) g7<1>UD 0x00000000UD { align1 1H }; +mov(16) g8<1>UD 0x00000000UD { align1 1H }; +mov(16) g9<1>UD 0x00000000UD { align1 1H }; +mov(16) g10<1>UD 0x00000000UD { align1 1H }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; + +/* Now, clear all GRF registers */ +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; +(+f0.0) jmpi(1) -8D { align1 WE_all 1N }; + +/* Terminante the thread */ +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; -- cgit v1.2.3 From bda8eaa6dee7525f4dac950810a85a88bf6c2ba0 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 29 Jun 2020 14:00:32 +0800 Subject: drm: sun4i: hdmi: Remove extra HPD polling The HPD sense mechanism in Allwinner's old HDMI encoder hardware is more or less an input-only GPIO. Other GPIO-based HPD implementations directly return the current state, instead of polling for a specific state and returning the other if that times out. Remove the I/O polling from sun4i_hdmi_connector_detect() and directly return a known state based on the current reading. This also gets rid of excessive CPU usage by kworker as reported on Stack Exchange [1] and Armbian forums [2]. [1] https://superuser.com/questions/1515001/debian-10-buster-on-cubietruck-with-bug-in-sun4i-drm-hdmi [2] https://forum.armbian.com/topic/14282-headless-systems-and-sun4i_drm_hdmi-a10a20/ Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200629060032.24134-1-wens@kernel.org --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index ce07ddc3e058..557cbe5ab35f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -259,9 +259,8 @@ sun4i_hdmi_connector_detect(struct drm_connector *connector, bool force) struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector); unsigned long reg; - if (readl_poll_timeout(hdmi->base + SUN4I_HDMI_HPD_REG, reg, - reg & SUN4I_HDMI_HPD_HIGH, - 0, 500000)) { + reg = readl(hdmi->base + SUN4I_HDMI_HPD_REG); + if (reg & SUN4I_HDMI_HPD_HIGH) { cec_phys_addr_invalidate(hdmi->cec_adap); return connector_status_disconnected; } -- cgit v1.2.3 From 6eb3cf2e06d22b2b08e6b0ab48cb9c05a8e1a107 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 29 Jun 2020 13:03:52 -0400 Subject: drm/amd/display: Only revalidate bandwidth on medium and fast updates [Why] Changes that are fast don't require updating DLG parameters making this call unnecessary. Considering this is an expensive call it should not be done on every flip. DML touches clocks, p-state support, DLG params and a few other DC internal flags and these aren't expected during fast. A hang has been reported with this change when called on every flip which suggests that modifying these fields is not recommended behavior on fast updates. [How] Guard the validation to only happen if update type isn't FAST. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1191 Fixes: a24eaa5c51255b ("drm/amd/display: Revalidate bandwidth before commiting DC updates") Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Roman Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 6f93a6ca4cf0..d016f50e187c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2538,10 +2538,12 @@ void dc_commit_updates_for_stream(struct dc *dc, copy_stream_update_to_stream(dc, context, stream, stream_update); - if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { - DC_ERROR("Mode validation failed for stream update!\n"); - dc_release_state(context); - return; + if (update_type > UPDATE_TYPE_FAST) { + if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + DC_ERROR("Mode validation failed for stream update!\n"); + dc_release_state(context); + return; + } } commit_planes_for_stream( -- cgit v1.2.3 From d7a6634a4cfba073ff6a526cb4265d6e58ece234 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jun 2020 17:55:57 -0400 Subject: drm/amdgpu/atomfirmware: fix vram_info fetching for renoir Renoir uses integrated_system_info table v12. The table has the same layout as v11 with respect to this data. Just reuse the existing code for v12 for stable. Fixes incorrectly reported vram info in the driver output. Acked-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 58f9d8c3a17a..44f927641b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -204,6 +204,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, (mode_info->atom_context->bios + data_offset); switch (crev) { case 11: + case 12: mem_channel_number = igp_info->v11.umachannelnumber; /* channel width is 64 */ if (vram_width) -- cgit v1.2.3 From beaf10efca64ac824240838ab1f054dfbefab5e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 1 Jul 2020 12:00:08 -0400 Subject: drm/amdgpu: use %u rather than %d for sclk/mclk Large clock values may overflow and show up as negative. Reported by prOMiNd on IRC. Acked-by: Nirmoy Das Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 16596a9ccabe..02e6f8c4dde0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -2784,7 +2784,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", sclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev, @@ -2819,7 +2819,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", mclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, -- cgit v1.2.3