summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorKarthik Poosa <karthik.poosa@intel.com>2026-01-13 02:05:18 +0530
committerRodrigo Vivi <rodrigo.vivi@intel.com>2026-01-12 17:00:29 -0500
commitc332fba805d659eca1f8e3a41d259c03421e81f1 (patch)
tree510e42afa3edc60615e8d9d57654a7f245ba3ddd /drivers/gpu
parentb1dcec9bd8a1de5fe04ecb3831f8db0a5c059970 (diff)
drm/xe/hwmon: Expose temperature limits
Read temperature limits using pcode mailbox and expose shutdown temperature limit as tempX_emergency, critical temperature limit as tempX_crit and GPU max temperature limit as temp2_max. Update Xe hwmon documentation with above entries. v2: - Resolve a documentation warning. - Address below review comments from Raag. - Update date and kernel version in Xe hwmon documentation. - Remove explicit disable of has_mbx_thermal_info for unsupported platforms. - Remove unnecessary default case in switches. - Remove obvious comments. - Use TEMP_LIMIT_MAX to compute number of dwords needed in xe_hwmon_thermal_info. - Remove THERMAL_LIMITS_DWORDS macro. - Use has_mbx_thermal_info for checking thermal mailbox support. v3: - Address below minor comments. (Raag) - Group new temperature attributes with existing temperature attributes as per channel index in Xe hwmon documentation. - Rename enums of xe_temp_limit to improve clarity. - Use DIV_ROUND_UP to calculate dwords needed for temperature limits. - Use return instead of breaks in xe_hwmon_temp_read. - Minor aesthetic refinements. v4: - Remove a redundant break. (Raag) - Update drm_dbg to drm_warn to inform user of unavailability for thermal mailbox on expected platforms. Signed-off-by: Karthik Poosa <karthik.poosa@intel.com> Reviewed-by: Raag Jadav <raag.jadav@intel.com> Link: https://patch.msgid.link/20260112203521.1014388-2-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c102
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c3
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h3
5 files changed, 108 insertions, 3 deletions
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4dab3057f58d..f689766adcb1 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -341,6 +341,8 @@ struct xe_device {
* pcode mailbox commands.
*/
u8 has_mbx_power_limits:1;
+ /** @info.has_mbx_thermal_info: Device supports thermal mailbox commands */
+ u8 has_mbx_thermal_info:1;
/** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */
u8 has_mem_copy_instr:1;
/** @info.has_mert: Device has standalone MERT */
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index ff2aea52ef75..7eb6a76fa217 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -53,6 +53,15 @@ enum xe_fan_channel {
FAN_MAX,
};
+enum xe_temp_limit {
+ TEMP_LIMIT_PKG_SHUTDOWN,
+ TEMP_LIMIT_PKG_CRIT,
+ TEMP_LIMIT_MEM_SHUTDOWN,
+ TEMP_LIMIT_PKG_MAX,
+ TEMP_LIMIT_MEM_CRIT,
+ TEMP_LIMIT_MAX
+};
+
/* Attribute index for powerX_xxx_interval sysfs entries */
enum sensor_attr_power {
SENSOR_INDEX_PSYS_PL1,
@@ -112,6 +121,18 @@ struct xe_hwmon_fan_info {
};
/**
+ * struct xe_hwmon_thermal_info - to store temperature data
+ */
+struct xe_hwmon_thermal_info {
+ union {
+ /** @limit: temperatures limits */
+ u8 limit[TEMP_LIMIT_MAX];
+ /** @data: temperature limits in dwords */
+ u32 data[DIV_ROUND_UP(TEMP_LIMIT_MAX, sizeof(u32))];
+ };
+};
+
+/**
* struct xe_hwmon - xe hwmon data structure
*/
struct xe_hwmon {
@@ -137,7 +158,8 @@ struct xe_hwmon {
u32 pl1_on_boot[CHANNEL_MAX];
/** @pl2_on_boot: power limit PL2 on boot */
u32 pl2_on_boot[CHANNEL_MAX];
-
+ /** @temp: Temperature info */
+ struct xe_hwmon_thermal_info temp;
};
static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel,
@@ -677,8 +699,11 @@ static const struct attribute_group *hwmon_groups[] = {
};
static const struct hwmon_channel_info * const hwmon_info[] = {
- HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL,
- HWMON_T_INPUT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_LABEL,
+ HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL |
+ HWMON_T_MAX,
+ HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL),
HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT |
HWMON_P_CAP,
HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP),
@@ -689,6 +714,19 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
NULL
};
+static int xe_hwmon_pcode_read_thermal_info(struct xe_hwmon *hwmon)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ int ret;
+
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_LIMITS, 0),
+ &hwmon->temp.data[0], &hwmon->temp.data[1]);
+ drm_dbg(&hwmon->xe->drm, "thermal info read val 0x%x val1 0x%x\n",
+ hwmon->temp.data[0], hwmon->temp.data[1]);
+
+ return ret;
+}
+
/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval)
{
@@ -787,6 +825,31 @@ static umode_t
xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
switch (attr) {
+ case hwmon_temp_emergency:
+ switch (channel) {
+ case CHANNEL_PKG:
+ return hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] ? 0444 : 0;
+ case CHANNEL_VRAM:
+ return hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] ? 0444 : 0;
+ default:
+ return 0;
+ }
+ case hwmon_temp_crit:
+ switch (channel) {
+ case CHANNEL_PKG:
+ return hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] ? 0444 : 0;
+ case CHANNEL_VRAM:
+ return hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] ? 0444 : 0;
+ default:
+ return 0;
+ }
+ case hwmon_temp_max:
+ switch (channel) {
+ case CHANNEL_PKG:
+ return hwmon->temp.limit[TEMP_LIMIT_PKG_MAX] ? 0444 : 0;
+ default:
+ return 0;
+ }
case hwmon_temp_input:
case hwmon_temp_label:
return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP, channel)) ? 0444 : 0;
@@ -808,6 +871,36 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
/* HW register value is in degrees Celsius, convert to millidegrees. */
*val = REG_FIELD_GET(TEMP_MASK, reg_val) * MILLIDEGREE_PER_DEGREE;
return 0;
+ case hwmon_temp_emergency:
+ switch (channel) {
+ case CHANNEL_PKG:
+ *val = hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] * MILLIDEGREE_PER_DEGREE;
+ return 0;
+ case CHANNEL_VRAM:
+ *val = hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] * MILLIDEGREE_PER_DEGREE;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ case hwmon_temp_crit:
+ switch (channel) {
+ case CHANNEL_PKG:
+ *val = hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] * MILLIDEGREE_PER_DEGREE;
+ return 0;
+ case CHANNEL_VRAM:
+ *val = hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] * MILLIDEGREE_PER_DEGREE;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ case hwmon_temp_max:
+ switch (channel) {
+ case CHANNEL_PKG:
+ *val = hwmon->temp.limit[TEMP_LIMIT_PKG_MAX] * MILLIDEGREE_PER_DEGREE;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
default:
return -EOPNOTSUPP;
}
@@ -1263,6 +1356,9 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
for (channel = 0; channel < FAN_MAX; channel++)
if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel))
xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);
+
+ if (hwmon->xe->info.has_mbx_thermal_info && xe_hwmon_pcode_read_thermal_info(hwmon))
+ drm_warn(&hwmon->xe->drm, "Thermal mailbox not supported by card firmware\n");
}
int xe_hwmon_register(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 673db1e82ec3..fa0a374d88f8 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -366,6 +366,7 @@ static const struct xe_device_desc bmg_desc = {
.has_fan_control = true,
.has_flat_ccs = 1,
.has_mbx_power_limits = true,
+ .has_mbx_thermal_info = true,
.has_gsc_nvm = 1,
.has_heci_cscfi = 1,
.has_i2c = true,
@@ -422,6 +423,7 @@ static const struct xe_device_desc cri_desc = {
.has_gsc_nvm = 1,
.has_i2c = true,
.has_mbx_power_limits = true,
+ .has_mbx_thermal_info = true,
.has_mert = true,
.has_pre_prod_wa = 1,
.has_soc_remapper_sysctrl = true,
@@ -687,6 +689,7 @@ static int xe_info_init_early(struct xe_device *xe,
/* runtime fusing may force flat_ccs to disabled later */
xe->info.has_flat_ccs = desc->has_flat_ccs;
xe->info.has_mbx_power_limits = desc->has_mbx_power_limits;
+ xe->info.has_mbx_thermal_info = desc->has_mbx_thermal_info;
xe->info.has_gsc_nvm = desc->has_gsc_nvm;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 5f20f56571d1..20acc5349ee6 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -48,6 +48,7 @@ struct xe_device_desc {
u8 has_late_bind:1;
u8 has_llc:1;
u8 has_mbx_power_limits:1;
+ u8 has_mbx_thermal_info:1;
u8 has_mem_copy_instr:1;
u8 has_mert:1;
u8 has_pre_prod_wa:1;
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 975892d6b230..dc8f241e5b9e 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -50,6 +50,9 @@
#define READ_PL_FROM_FW 0x1
#define READ_PL_FROM_PCODE 0x0
+#define PCODE_THERMAL_INFO 0x25
+#define READ_THERMAL_LIMITS 0x0
+
#define PCODE_LATE_BINDING 0x5C
#define GET_CAPABILITY_STATUS 0x0
#define V1_FAN_SUPPORTED REG_BIT(0)