summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorKarthik Poosa <karthik.poosa@intel.com>2026-01-13 02:05:19 +0530
committerRodrigo Vivi <rodrigo.vivi@intel.com>2026-01-12 17:00:29 -0500
commit3a0cb885e111db34b22058a3b82e99e49f02ac94 (patch)
treeeba3f742036f133b0a928e817357f8e47532b0e0 /drivers/gpu
parentc332fba805d659eca1f8e3a41d259c03421e81f1 (diff)
drm/xe/hwmon: Expose memory controller temperature
Expose GPU memory controller average temperature and its limits under temp4_xxx. Update Xe hwmon documentation for this. v2: - Rephrase commit message. (Badal) - Update kernel version in Xe hwmon documentation. (Raag) v3: - Update kernel version in Xe hwmon documentation. - Address review comments from Raag. - Remove obvious comments. - Remove redundant debug logs. - Remove unnecessary checks. - Avoid magic numbers. - Add new comments. - Use temperature sensors count to make memory controller visible. - Use temperature limits of package for memory controller. v4: - Address review comments from Raag. - Group new temperature attributes with existing temperature attributes as per channel index in Xe hwmon documentation. - Use DIV_ROUND_UP to calculate dwords needed for temperature limits. - Minor aesthetic refinements. - Remove unused TEMP_MASK_MAILBOX. v5: - Use REG_FIELD_GET to get count from READ_THERMAL_DATA output. (Raag) - Change count print from decimal to hexadecimal. - Cosmetic changes. Signed-off-by: Karthik Poosa <karthik.poosa@intel.com> Reviewed-by: Raag Jadav <raag.jadav@intel.com> Link: https://patch.msgid.link/20260112203521.1014388-3-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c79
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h2
2 files changed, 76 insertions, 5 deletions
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 7eb6a76fa217..51a2c23be99e 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -43,6 +43,7 @@ enum xe_hwmon_channel {
CHANNEL_CARD,
CHANNEL_PKG,
CHANNEL_VRAM,
+ CHANNEL_MCTRL,
CHANNEL_MAX,
};
@@ -100,6 +101,9 @@ enum sensor_attr_power {
*/
#define PL_WRITE_MBX_TIMEOUT_MS (1)
+/* Index of memory controller in READ_THERMAL_DATA output */
+#define TEMP_INDEX_MCTRL 2
+
/**
* struct xe_hwmon_energy_info - to accumulate energy
*/
@@ -130,6 +134,10 @@ struct xe_hwmon_thermal_info {
/** @data: temperature limits in dwords */
u32 data[DIV_ROUND_UP(TEMP_LIMIT_MAX, sizeof(u32))];
};
+ /** @count: no of temperature sensors available for the platform */
+ u8 count;
+ /** @value: signed value from each sensor */
+ s8 value[U8_MAX];
};
/**
@@ -703,6 +711,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
HWMON_T_LABEL,
HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL |
HWMON_T_MAX,
+ HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL),
HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT |
HWMON_P_CAP,
@@ -717,16 +726,51 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
static int xe_hwmon_pcode_read_thermal_info(struct xe_hwmon *hwmon)
{
struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ u32 config = 0;
int ret;
ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_LIMITS, 0),
&hwmon->temp.data[0], &hwmon->temp.data[1]);
+ if (ret)
+ return ret;
+
drm_dbg(&hwmon->xe->drm, "thermal info read val 0x%x val1 0x%x\n",
hwmon->temp.data[0], hwmon->temp.data[1]);
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_CONFIG, 0),
+ &config, NULL);
+ if (ret)
+ return ret;
+
+ drm_dbg(&hwmon->xe->drm, "thermal config count 0x%x\n", config);
+ hwmon->temp.count = REG_FIELD_GET(TEMP_MASK, config);
+
return ret;
}
+static int get_mc_temp(struct xe_hwmon *hwmon, long *val)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ u32 *dword = (u32 *)hwmon->temp.value;
+ s32 average = 0;
+ int ret, i;
+
+ for (i = 0; i < DIV_ROUND_UP(TEMP_LIMIT_MAX, sizeof(u32)); i++) {
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_DATA, i),
+ (dword + i), NULL);
+ if (ret)
+ return ret;
+ drm_dbg(&hwmon->xe->drm, "thermal data for group %d val 0x%x\n", i, dword[i]);
+ }
+
+ for (i = TEMP_INDEX_MCTRL; i < hwmon->temp.count - 1; i++)
+ average += hwmon->temp.value[i];
+
+ average /= (hwmon->temp.count - TEMP_INDEX_MCTRL - 1);
+ *val = average * MILLIDEGREE_PER_DEGREE;
+ return 0;
+}
+
/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval)
{
@@ -831,6 +875,8 @@ xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
return hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] ? 0444 : 0;
case CHANNEL_VRAM:
return hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] ? 0444 : 0;
+ case CHANNEL_MCTRL:
+ return hwmon->temp.count ? 0444 : 0;
default:
return 0;
}
@@ -840,6 +886,8 @@ xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
return hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] ? 0444 : 0;
case CHANNEL_VRAM:
return hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] ? 0444 : 0;
+ case CHANNEL_MCTRL:
+ return hwmon->temp.count ? 0444 : 0;
default:
return 0;
}
@@ -852,7 +900,16 @@ xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
}
case hwmon_temp_input:
case hwmon_temp_label:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP, channel)) ? 0444 : 0;
+ switch (channel) {
+ case CHANNEL_PKG:
+ case CHANNEL_VRAM:
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP,
+ channel)) ? 0444 : 0;
+ case CHANNEL_MCTRL:
+ return hwmon->temp.count ? 0444 : 0;
+ default:
+ return 0;
+ }
default:
return 0;
}
@@ -866,14 +923,23 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
switch (attr) {
case hwmon_temp_input:
- reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_TEMP, channel));
+ switch (channel) {
+ case CHANNEL_PKG:
+ case CHANNEL_VRAM:
+ reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_TEMP, channel));
- /* HW register value is in degrees Celsius, convert to millidegrees. */
- *val = REG_FIELD_GET(TEMP_MASK, reg_val) * MILLIDEGREE_PER_DEGREE;
- return 0;
+ /* HW register value is in degrees Celsius, convert to millidegrees. */
+ *val = REG_FIELD_GET(TEMP_MASK, reg_val) * MILLIDEGREE_PER_DEGREE;
+ return 0;
+ case CHANNEL_MCTRL:
+ return get_mc_temp(hwmon, val);
+ default:
+ return -EOPNOTSUPP;
+ }
case hwmon_temp_emergency:
switch (channel) {
case CHANNEL_PKG:
+ case CHANNEL_MCTRL:
*val = hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] * MILLIDEGREE_PER_DEGREE;
return 0;
case CHANNEL_VRAM:
@@ -885,6 +951,7 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
case hwmon_temp_crit:
switch (channel) {
case CHANNEL_PKG:
+ case CHANNEL_MCTRL:
*val = hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] * MILLIDEGREE_PER_DEGREE;
return 0;
case CHANNEL_VRAM:
@@ -1262,6 +1329,8 @@ static int xe_hwmon_read_label(struct device *dev,
*str = "pkg";
else if (channel == CHANNEL_VRAM)
*str = "vram";
+ else if (channel == CHANNEL_MCTRL)
+ *str = "mctrl";
return 0;
case hwmon_power:
case hwmon_energy:
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index dc8f241e5b9e..ad713a3e34e5 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -52,6 +52,8 @@
#define PCODE_THERMAL_INFO 0x25
#define READ_THERMAL_LIMITS 0x0
+#define READ_THERMAL_CONFIG 0x1
+#define READ_THERMAL_DATA 0x2
#define PCODE_LATE_BINDING 0x5C
#define GET_CAPABILITY_STATUS 0x0