drivers: misc: publish fps, cpu load, thread count

adapted cpufreq_interactive load estimation and averaging of the number of runnable threads to a misc device that will write the current cpu id, load and nr_runnable_threads for each cpu to a sysfs node. Intended to provide an accurate cpu load reading to user space scaling controllers. In addition the momentary frame rate is written to /d/fps. Bug 1161410 Bug 1164121 Change-Id: I041e230463fa7d4a3c83e4a2ab2ce199f9a5d8ba Signed-off-by: Ilan Aelion <iaelion@nvidia.com> Reviewed-on: http://git-master/r/165089 Reviewed-by: Mrutyunjay Sawant <msawant@nvidia.com> Tested-by: Mrutyunjay Sawant <msawant@nvidia.com>
author: Ilan Aelion <iaelion@nvidia.com> 2012-10-26 10:44:58 -0600
committer: Dan Willemsen <dwillemsen@nvidia.com> 2013-09-14 12:56:58 -0700
commit: 0f96f06c0e2e1116c79f74ba6e0ea08d709ed27f (patch)
tree: c145606eef0613405dc1763d00df27443b97791d /drivers
parent: 1983c479035d82600c0e37def951b195e7dc45a3 (diff)
4 files changed, 544 insertions, 9 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 1959ddef4676..d97308940068 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -611,6 +611,13 @@ config BLUEDROID_PM
           Say Y here to compile support for bluedroid_pm support into the kernel
           or say M to compile it as module (bluedroid_pm).
 
+config CPULOAD_MONITOR
+	bool "Publish cpu load measures in sysfs"
+	depends on CPU_FREQ
+	default y
+	---help---
+	Publish cpu load measured in sysfs, to be used by user space cpu
+	frequency controllers.
 
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f79faff6414a..1a788ac973e3 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -70,3 +70,4 @@ obj-$(CONFIG_SND_SOC_TEGRA_CS42L73)	+= a2220.o
 obj-$(CONFIG_SND_SOC_TEGRA_RT5640)	+= tfa9887.o
 obj-$(CONFIG_FAN_THERM_EST)	+= therm_fan_est.o
 obj-$(CONFIG_BLUEDROID_PM)      += bluedroid_pm.o
+obj-$(CONFIG_CPULOAD_MONITOR)	+= cpuload.o
diff --git a/drivers/misc/cpuload.c b/drivers/misc/cpuload.c
new file mode 100644
index 000000000000..01ee41bab805
--- /dev/null
+++ b/drivers/misc/cpuload.c
@@ -0,0 +1,484 @@
+/*
+ * drivers/misc/cpuload.c
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+
+#include <asm/cputime.h>
+
+static atomic_t active_count = ATOMIC_INIT(0);
+static unsigned int enabled;
+
+static void cpuloadmon_enable(unsigned int state);
+
+struct cpuloadmon_cpuinfo {
+	/* cpu load */
+	struct timer_list cpu_timer;
+	int timer_idlecancel;
+	u64 time_in_idle;
+	u64 time_in_iowait;
+	u64 idle_exit_time;
+	u64 timer_run_time;
+	int idling;
+	int monitor_enabled;
+	int cpu_load;
+
+	/* runnable threads */
+	u64 previous_integral;
+	unsigned int avg;
+	bool integral_sampled;
+	u64 prev_timestamp;
+};
+
+static DEFINE_PER_CPU(struct cpuloadmon_cpuinfo, cpuinfo);
+
+/* Consider IO as busy */
+static unsigned long io_is_busy;
+
+/*
+ * The sample rate of the timer used to increase frequency
+ */
+#define DEFAULT_TIMER_RATE 20000;
+static unsigned long timer_rate;
+
+/* nr runnable threads */
+#define NR_FSHIFT_EXP	3
+#define NR_FSHIFT	(1 << NR_FSHIFT_EXP)
+#define EXP    1497 /* 20 msec window */
+
+static inline cputime64_t get_cpu_iowait_time(
+	unsigned int cpu, cputime64_t *wall)
+{
+	u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
+
+	if (iowait_time == -1ULL)
+		return 0;
+
+	return iowait_time;
+}
+
+static void cpuloadmon_timer(unsigned long data)
+{
+	unsigned int delta_idle;
+	unsigned int delta_iowait;
+	unsigned int delta_time;
+	u64 time_in_idle;
+	u64 time_in_iowait;
+	u64 idle_exit_time;
+	struct cpuloadmon_cpuinfo *pcpu =
+		&per_cpu(cpuinfo, data);
+	u64 now_idle;
+	u64 now_iowait;
+	u64 integral, old_integral, delta_integral, delta_time_nr, cur_time;
+
+	smp_rmb();
+
+	if (!pcpu->monitor_enabled)
+		goto exit;
+
+	/*
+	 * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
+	 * this lets idle exit know the current idle time sample has
+	 * been processed, and idle exit can generate a new sample and
+	 * re-arm the timer.  This prevents a concurrent idle
+	 * exit on that CPU from writing a new set of info at the same time
+	 * the timer function runs (the timer function can't use that info
+	 * until more time passes).
+	 */
+	time_in_idle = pcpu->time_in_idle;
+	time_in_iowait = pcpu->time_in_iowait;
+	idle_exit_time = pcpu->idle_exit_time;
+	now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
+	now_iowait = get_cpu_iowait_time(data, NULL);
+	smp_wmb();
+
+	/* If we raced with cancelling a timer, skip. */
+	if (!idle_exit_time)
+		goto exit;
+
+	delta_idle = (unsigned int)(now_idle - time_in_idle);
+	delta_iowait = (unsigned int)(now_iowait - time_in_iowait);
+	delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
+
+	/*
+	 * If timer ran less than 1ms after short-term sample started, retry.
+	 */
+	if (delta_time < 1000)
+		goto rearm;
+
+	if (!io_is_busy)
+		delta_idle += delta_iowait;
+
+	if (delta_idle > delta_time)
+		pcpu->cpu_load = 0;
+	else
+		pcpu->cpu_load = 100 * (delta_time - delta_idle) / delta_time;
+
+	/* get avg nr runnables */
+	integral = nr_running_integral(data);
+	old_integral = pcpu->previous_integral;
+	pcpu->previous_integral = integral;
+	cur_time = ktime_to_ns(ktime_get());
+	delta_time_nr = cur_time - pcpu->prev_timestamp;
+	pcpu->prev_timestamp = cur_time;
+
+	if (!pcpu->integral_sampled) {
+		pcpu->integral_sampled = true;
+		/* First sample to initialize prev_integral, skip
+		 * avg calculation
+		 */
+	} else {
+		if (integral < old_integral) {
+			/* Overflow */
+			delta_integral = (ULLONG_MAX - old_integral) + integral;
+		} else {
+			delta_integral = integral - old_integral;
+		}
+
+		/* Calculate average for the previous sample window */
+		do_div(delta_integral, delta_time_nr);
+		pcpu->avg = delta_integral;
+	}
+
+rearm:
+	if (!timer_pending(&pcpu->cpu_timer)) {
+		if (pcpu->idling)
+			goto exit;
+
+		pcpu->time_in_idle = get_cpu_idle_time_us(
+			data, &pcpu->idle_exit_time);
+		pcpu->time_in_iowait = get_cpu_iowait_time(
+			data, NULL);
+
+		mod_timer(&pcpu->cpu_timer,
+			  jiffies + usecs_to_jiffies(timer_rate));
+	}
+
+exit:
+	return;
+}
+
+static void cpuloadmon_idle_start(void)
+{
+	struct cpuloadmon_cpuinfo *pcpu =
+		&per_cpu(cpuinfo, smp_processor_id());
+	int pending;
+
+	if (!pcpu->monitor_enabled)
+		return;
+
+	pcpu->idling = 1;
+	smp_wmb();
+	pending = timer_pending(&pcpu->cpu_timer);
+
+	if (pending && pcpu->timer_idlecancel) {
+		del_timer(&pcpu->cpu_timer);
+		/*
+		 * Ensure last timer run time is after current idle
+		 * sample start time, so next idle exit will always
+		 * start a new idle sampling period.
+		 */
+		pcpu->idle_exit_time = 0;
+		pcpu->timer_idlecancel = 0;
+	}
+}
+
+static void cpuloadmon_idle_end(void)
+{
+	struct cpuloadmon_cpuinfo *pcpu =
+		&per_cpu(cpuinfo, smp_processor_id());
+
+	if (!pcpu->monitor_enabled)
+		return;
+
+	pcpu->idling = 0;
+	smp_wmb();
+
+	/*
+	 * Arm the timer for 1-2 ticks later if not already, and if the timer
+	 * function has already processed the previous load sampling
+	 * interval.  (If the timer is not pending but has not processed
+	 * the previous interval, it is probably racing with us on another
+	 * CPU.  Let it compute load based on the previous sample and then
+	 * re-arm the timer for another interval when it's done, rather
+	 * than updating the interval start time to be "now", which doesn't
+	 * give the timer function enough time to make a decision on this
+	 * run.)
+	 */
+	if (timer_pending(&pcpu->cpu_timer) == 0 &&
+	    pcpu->timer_run_time >= pcpu->idle_exit_time &&
+	    pcpu->monitor_enabled) {
+		pcpu->time_in_idle =
+			get_cpu_idle_time_us(smp_processor_id(),
+					     &pcpu->idle_exit_time);
+		pcpu->time_in_iowait =
+			get_cpu_iowait_time(smp_processor_id(),
+						NULL);
+		pcpu->timer_idlecancel = 0;
+		mod_timer(&pcpu->cpu_timer,
+			  jiffies + usecs_to_jiffies(timer_rate));
+	}
+}
+
+#define DECL_CPULOAD_ATTR(name) \
+static ssize_t show_##name(struct kobject *kobj, \
+	struct attribute *attr, char *buf) \
+{ \
+	return sprintf(buf, "%lu\n", name); \
+} \
+\
+static ssize_t store_##name(struct kobject *kobj,\
+		struct attribute *attr, const char *buf, size_t count) \
+{ \
+	int ret; \
+	unsigned long val; \
+\
+	ret = kstrtoul(buf, 0, &val); \
+	if (ret < 0) \
+		return ret; \
+	name = val; \
+	return count; \
+} \
+\
+static struct global_attr name##_attr = __ATTR(name, 0644, \
+		show_##name, store_##name);
+
+static ssize_t show_cpus_online(struct kobject *kobj,
+		struct attribute *attr, char *buf)
+{
+	unsigned int i, t;
+	const cpumask_t *cpus = cpu_online_mask;
+
+	i = 0;
+	for_each_cpu_mask(t, *cpus)
+		i++;
+
+	return sprintf(buf, "%u\n", i);
+}
+
+static struct global_attr cpus_online_attr = __ATTR(cpus_online, 0444,
+		show_cpus_online, NULL);
+
+static ssize_t show_cpu_load(struct kobject *kobj,
+		struct attribute *attr, char *buf)
+{
+	unsigned int t, len, total;
+	const cpumask_t *cpus = cpu_online_mask;
+	struct cpuloadmon_cpuinfo *pcpu;
+
+	total = 0;
+
+	for_each_cpu_mask(t, *cpus) {
+		pcpu = &per_cpu(cpuinfo, t);
+		len = sprintf(buf, "%u %u %u\n",
+			t, pcpu->cpu_load, pcpu->avg);
+		total += len;
+		buf = &buf[len];
+	}
+
+	return total;
+}
+
+static struct global_attr cpu_load_attr = __ATTR(cpu_load, 0444,
+		show_cpu_load, NULL);
+
+static ssize_t show_enable(struct kobject *kobj,
+		struct attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", enabled);
+}
+
+static ssize_t store_enable(struct kobject *kobj,
+		struct attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+	unsigned int before = enabled;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	enabled = val;
+	if (before != enabled)
+			cpuloadmon_enable(enabled);
+
+	return count;
+}
+static struct global_attr enable_attr = __ATTR(enable, 0644,
+		show_enable, store_enable);
+
+DECL_CPULOAD_ATTR(io_is_busy)
+DECL_CPULOAD_ATTR(timer_rate)
+#undef DECL_CPULOAD_ATTR
+
+static struct attribute *cpuload_attributes[] = {
+	&io_is_busy_attr.attr,
+	&timer_rate_attr.attr,
+	&cpus_online_attr.attr,
+	&cpu_load_attr.attr,
+	&enable_attr.attr,
+	NULL,
+};
+
+static struct attribute_group cpuload_attr_group = {
+	.attrs = cpuload_attributes,
+	.name = "cpuload",
+};
+
+static int cpuloadmon_idle_notifier(struct notifier_block *nb,
+					     unsigned long val,
+					     void *data)
+{
+	switch (val) {
+	case IDLE_START:
+		cpuloadmon_idle_start();
+		break;
+	case IDLE_END:
+		cpuloadmon_idle_end();
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block cpuloadmon_idle_nb = {
+	.notifier_call = cpuloadmon_idle_notifier,
+};
+
+static void cpuloadmon_enable(unsigned int state)
+{
+	unsigned int j;
+	struct cpuloadmon_cpuinfo *pcpu;
+	const cpumask_t *cpus = cpu_possible_mask;
+
+	if (state) {
+		u64 last_update;
+
+		for_each_cpu(j, cpus) {
+			pcpu = &per_cpu(cpuinfo, j);
+			pcpu->time_in_idle =
+				get_cpu_idle_time_us(j, &last_update);
+			pcpu->idle_exit_time = last_update;
+			pcpu->time_in_iowait =
+				get_cpu_iowait_time(j, NULL);
+			pcpu->timer_idlecancel = 1;
+			pcpu->monitor_enabled = 1;
+			smp_wmb();
+
+			if (!timer_pending(&pcpu->cpu_timer))
+				mod_timer(&pcpu->cpu_timer, jiffies + 2);
+		}
+	} else {
+		for_each_cpu(j, cpus) {
+			pcpu = &per_cpu(cpuinfo, j);
+			pcpu->monitor_enabled = 0;
+			smp_wmb();
+			del_timer_sync(&pcpu->cpu_timer);
+
+			/*
+			 * Reset idle exit time since we may cancel the timer
+			 * before it can run after the last idle exit time,
+			 * to avoid tripping the check in idle exit for a timer
+			 * that is trying to run.
+			 */
+			pcpu->idle_exit_time = 0;
+		}
+	}
+
+	enabled = state;
+}
+
+static int cpuloadmon_start(void)
+{
+	int rc;
+
+	cpuloadmon_enable(1);
+
+	/*
+	 * Do not register the idle hook and create sysfs
+	 * entries if we have already done so.
+	 */
+	if (atomic_inc_return(&active_count) > 1)
+		return 0;
+
+	rc = sysfs_create_group(cpufreq_global_kobject,
+			&cpuload_attr_group);
+	if (rc)
+		return rc;
+
+	idle_notifier_register(&cpuloadmon_idle_nb);
+
+	return 0;
+}
+
+static int cpuloadmon_stop(void)
+{
+	cpuloadmon_enable(0);
+
+	if (atomic_dec_return(&active_count) > 0)
+		return 0;
+
+	idle_notifier_unregister(&cpuloadmon_idle_nb);
+	sysfs_remove_group(cpufreq_global_kobject,
+			&cpuload_attr_group);
+
+	return 0;
+}
+
+static int __init cpuload_monitor_init(void)
+{
+	unsigned int i;
+	struct cpuloadmon_cpuinfo *pcpu;
+
+	timer_rate = DEFAULT_TIMER_RATE;
+
+	/* Initalize per-cpu timers */
+	for_each_possible_cpu(i) {
+		pcpu = &per_cpu(cpuinfo, i);
+		init_timer(&pcpu->cpu_timer);
+		pcpu->cpu_timer.function = cpuloadmon_timer;
+		pcpu->cpu_timer.data = i;
+	}
+
+	cpuloadmon_start();
+
+	/* disable by default */
+	cpuloadmon_enable(0);
+
+	return 0;
+}
+
+module_init(cpuload_monitor_init);
+
+static void __exit cpuload_monitor_exit(void)
+{
+	cpuloadmon_stop();
+}
+
+module_exit(cpuload_monitor_exit);
+
+MODULE_AUTHOR("Ilan Aelion <iaelion@nvidia.com>");
+MODULE_DESCRIPTION("'cpuload_monitor' - A cpu load monitor");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/tegra-throughput.c b/drivers/misc/tegra-throughput.c
index 284ce9fca4d8..28bf1cb47f73 100644
--- a/drivers/misc/tegra-throughput.c
+++ b/drivers/misc/tegra-throughput.c
@@ -21,6 +21,7 @@
 #include <linux/ktime.h>
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
+#include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/throughput_ioctl.h>
@@ -36,6 +37,12 @@ static ktime_t last_flip;
 static unsigned int multiple_app_disable;
 static spinlock_t lock;
 
+#define EMA_PERIOD 16
+#define EMA_SHIFT   4
+
+static int frame_time_sum_init = 1;
+static long frame_time_sum; /* used for fps EMA */
+
 static struct work_struct work;
 static int throughput_hint;
 
@@ -45,7 +52,7 @@ static void set_throughput_hint(struct work_struct *work)
 	nvhost_scale3d_set_throughput_hint(throughput_hint);
 }
 
-static int throughput_flip_callback(void)
+static void throughput_flip_callback(void)
 {
 	long timediff;
 	ktime_t now;
@@ -55,8 +62,10 @@ static int throughput_flip_callback(void)
 		return NOTIFY_DONE;
 
 	now = ktime_get();
+
 	if (last_flip.tv64 != 0) {
 		timediff = (long) ktime_us_delta(now, last_flip);
+
 		if (timediff > (long) USHRT_MAX)
 			last_frame_time = USHRT_MAX;
 		else
@@ -73,7 +82,16 @@ static int throughput_flip_callback(void)
 
 		if (!work_pending(&work))
 			schedule_work(&work);
+
+		if (frame_time_sum_init) {
+			frame_time_sum = last_frame_time * EMA_PERIOD;
+			frame_time_sum_init = 0;
+		} else {
+			int t = frame_time_sum * (EMA_PERIOD - 1);
+			frame_time_sum = (t >> EMA_SHIFT) + last_frame_time;
+		}
 	}
+
 	last_flip = now;
 
 	return NOTIFY_OK;
@@ -109,8 +127,11 @@ static int throughput_open(struct inode *inode, struct file *file)
 	}
 
 	throughput_active_app_count++;
-	if (throughput_active_app_count > 1)
+	if (throughput_active_app_count > 1) {
 		multiple_app_disable = 1;
+		frame_time_sum_init = 1;
+		frame_time_sum = 0;
+	}
 
 	spin_unlock(&lock);
 
@@ -129,6 +150,8 @@ static int throughput_release(struct inode *inode, struct file *file)
 		reset_target_frame_time();
 		multiple_app_disable = 0;
 		callback_initialized = 0;
+		frame_time_sum_init = 1;
+		frame_time_sum = 0;
 		tegra_dc_unset_flip_callback();
 	}
 
@@ -168,9 +191,7 @@ static int throughput_set_target_fps(unsigned long arg)
 }
 
 static long
-throughput_ioctl(struct file *file,
-			  unsigned int cmd,
-			  unsigned long arg)
+throughput_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
 
@@ -203,10 +224,30 @@ static const struct file_operations throughput_user_fops = {
 #define TEGRA_THROUGHPUT_MINOR 1
 
 static struct miscdevice throughput_miscdev = {
-	.minor  = TEGRA_THROUGHPUT_MINOR,
-	.name   = "tegra-throughput",
-	.fops   = &throughput_user_fops,
-	.mode   = 0666,
+	.minor = TEGRA_THROUGHPUT_MINOR,
+	.name  = "tegra-throughput",
+	.fops  = &throughput_user_fops,
+	.mode  = 0666,
+};
+
+static int fps_show(struct seq_file *s, void *unused)
+{
+	int frame_time_avg = frame_time_sum >> EMA_SHIFT;
+	int fps = frame_time_avg > 0 ? 1000000 / frame_time_avg : 0;
+	seq_printf(s, "%d\n", fps);
+	return 0;
+}
+
+static int fps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, fps_show, inode->i_private);
+}
+
+static const struct file_operations fps_fops = {
+	.open		= fps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 int __init throughput_init_miscdev(void)
@@ -225,6 +266,8 @@ int __init throughput_init_miscdev(void)
 		return ret;
 	}
 
+	debugfs_create_file("fps", 0444, NULL, NULL, &fps_fops);
+
 	return 0;
 }
author	Ilan Aelion <iaelion@nvidia.com>	2012-10-26 10:44:58 -0600
committer	Dan Willemsen <dwillemsen@nvidia.com>	2013-09-14 12:56:58 -0700
commit	0f96f06c0e2e1116c79f74ba6e0ea08d709ed27f (patch)
tree	c145606eef0613405dc1763d00df27443b97791d /drivers
parent	1983c479035d82600c0e37def951b195e7dc45a3 (diff)