summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhenguoYao <yaozhenguo1@gmail.com>2025-08-12 16:25:10 +0800
committerAndrew Morton <akpm@linux-foundation.org>2025-09-13 17:32:47 -0700
commit95f091274f3db39493e8b5c44671b9f1e02c0c25 (patch)
tree036ba4a2618964b1ab91d6dffbc7adc6f03cdb5c
parent41f88ddfd453fe894678e1f6909b9fb9e08e8c3d (diff)
watchdog/softlockup: fix incorrect CPU utilization output during softlockup
Since we use 16-bit precision, the raw data will undergo integer division, which may sometimes result in data loss. This can lead to slightly inaccurate CPU utilization calculations. Under normal circumstances, this isn't an issue. However, when CPU utilization reaches 100%, the calculated result might exceed 100%. For example, with raw data like the following: sample_period 400000134 new_stat 83648414036 old_stat 83247417494 sample_period=400000134/2^24=23 new_stat=83648414036/2^24=4985 old_stat=83247417494/2^24=4961 util=105% Below log will output: CPU#3 Utilization every 0s during lockup: #1: 0% system, 0% softirq, 105% hardirq, 0% idle #2: 0% system, 0% softirq, 105% hardirq, 0% idle #3: 0% system, 0% softirq, 100% hardirq, 0% idle #4: 0% system, 0% softirq, 105% hardirq, 0% idle #5: 0% system, 0% softirq, 105% hardirq, 0% idle To avoid confusion, we enforce a 100% display cap when calculations exceed this threshold. We also round to the nearest multiple of 16.8 milliseconds to improve the accuracy. [yaozhenguo1@gmail.com: make get_16bit_precision() more accurate, fix comment layout] Link: https://lkml.kernel.org/r/20250818081438.40540-1-yaozhenguo@jd.com Link: https://lkml.kernel.org/r/20250812082510.32291-1-yaozhenguo@jd.com Signed-off-by: ZhenguoYao <yaozhenguo1@gmail.com> Cc: Bitao Hu <yaoma@linux.alibaba.com> Cc: Li Huafei <lihuafei1@huawei.com> Cc: Max Kellermann <max.kellermann@ionos.com> Cc: Thomas Gleinxer <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--kernel/watchdog.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9c7134f7d2c4..5413aa85e8a4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -425,7 +425,11 @@ static DEFINE_PER_CPU(u8, cpustat_tail);
*/
static u16 get_16bit_precision(u64 data_ns)
{
- return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */
+ /*
+ * 2^24ns ~= 16.8ms
+ * Round to the nearest multiple of 16.8 milliseconds.
+ */
+ return (data_ns + (1 << 23)) >> 24LL;
}
static void update_cpustat(void)
@@ -444,6 +448,14 @@ static void update_cpustat(void)
old_stat = __this_cpu_read(cpustat_old[i]);
new_stat = get_16bit_precision(cpustat[tracked_stats[i]]);
util = DIV_ROUND_UP(100 * (new_stat - old_stat), sample_period_16);
+ /*
+ * Since we use 16-bit precision, the raw data will undergo
+ * integer division, which may sometimes result in data loss,
+ * and then result might exceed 100%. To avoid confusion,
+ * we enforce a 100% display cap when calculations exceed this threshold.
+ */
+ if (util > 100)
+ util = 100;
__this_cpu_write(cpustat_util[tail][i], util);
__this_cpu_write(cpustat_old[i], new_stat);
}