ARM: tegra: Add Tegra Profiler

Add Tegra Profiler kernel misc driver Bug 1312406 Change-Id: Ie5715bd0029a4d06fb20fa4ad8724827411a6c3b Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com> Reviewed-on: http://git-master/r/250840 GVS: Gerrit_Virtual_Submit Reviewed-by: Daniel Horowitz <dhorowitz@nvidia.com> Tested-by: Daniel Horowitz <dhorowitz@nvidia.com> Reviewed-by: Gabby Lee <galee@nvidia.com>
author: Igor Nabirushkin <inabirushkin@nvidia.com> 2013-07-18 21:42:07 +0400
committer: Gabby Lee <galee@nvidia.com> 2013-08-20 18:25:26 -0700
commit: 0a2223fea024a7a1861fd7ba5a7cef909e8f3dbc (patch)
tree: ba8f504b3e0dea9bc90008dc3f70cbfa871ace29 /drivers
parent: df842a2215742f2f0f0034761dec3de0a5efa048 (diff)
28 files changed, 4959 insertions, 0 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 9d43f29d3cbe..a8d12e246938 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -616,4 +616,5 @@ source "drivers/misc/carma/Kconfig"
 source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/tegra-baseband/Kconfig"
 source "drivers/misc/tegra-cec/Kconfig"
+source "drivers/misc/tegra-profiler/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 536df7e36498..4403b1ec0ea1 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_SND_SOC_TEGRA_RT5640)	+= tfa9887.o
 obj-$(CONFIG_FAN_THERM_EST)	+= therm_fan_est.o
 obj-$(CONFIG_BLUEDROID_PM)      += bluedroid_pm.o
 obj-$(CONFIG_CPULOAD_MONITOR)	+= cpuload.o
+obj-$(CONFIG_TEGRA_PROFILER)	+= tegra-profiler/
diff --git a/drivers/misc/tegra-profiler/Kconfig b/drivers/misc/tegra-profiler/Kconfig
new file mode 100644
index 000000000000..312b75e9b848
--- /dev/null
+++ b/drivers/misc/tegra-profiler/Kconfig
@@ -0,0 +1,6 @@
+config TEGRA_PROFILER
+	bool "Enable Tegra profiler"
+	depends on ARCH_TEGRA
+	select KPROBES
+	help
+	  This option enables Tegra profiler
diff --git a/drivers/misc/tegra-profiler/Makefile b/drivers/misc/tegra-profiler/Makefile
new file mode 100644
index 000000000000..7b3d8088aa2f
--- /dev/null
+++ b/drivers/misc/tegra-profiler/Makefile
@@ -0,0 +1,18 @@
+#
+# drivers/misc/tegra-profiler/Makefile
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+#
+
+obj-$(CONFIG_TEGRA_PROFILER) += tegra-profiler.o
+tegra-profiler-objs := main.o armv7_pmu.o pl310.o hrt.o comm.o mmap.o backtrace.o debug.o ma.o power_clk.o auth.o
+
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.c b/drivers/misc/tegra-profiler/armv7_pmu.c
new file mode 100644
index 000000000000..04436f8c7e02
--- /dev/null
+++ b/drivers/misc/tegra-profiler/armv7_pmu.c
@@ -0,0 +1,487 @@
+/*
+ * drivers/misc/tegra-profiler/armv7_pmu.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <asm/cputype.h>
+#include <asm/pmu.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "armv7_pmu.h"
+#include "quadd.h"
+#include "debug.h"
+
+static struct armv7_pmu_ctx pmu_ctx;
+
+DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val);
+
+static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
+	[QUADD_EVENT_TYPE_INSTRUCTIONS] =
+		QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE,
+	[QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+		QUADD_ARMV7_HW_EVENT_PC_WRITE,
+	[QUADD_EVENT_TYPE_BRANCH_MISSES] =
+		QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
+	[QUADD_EVENT_TYPE_BUS_CYCLES] =
+		QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES,
+
+	[QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+		QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
+	[QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+		QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
+	[QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+		QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
+
+	[QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+		QUADD_ARMV7_UNSUPPORTED_EVENT,
+	[QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+		QUADD_ARMV7_UNSUPPORTED_EVENT,
+	[QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+		QUADD_ARMV7_UNSUPPORTED_EVENT,
+};
+
+static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
+	[QUADD_EVENT_TYPE_INSTRUCTIONS] =
+				QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED,
+	[QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+				QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE,
+	[QUADD_EVENT_TYPE_BRANCH_MISSES] =
+				QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
+	[QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
+
+	[QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+				QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL,
+	[QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+				QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL,
+	[QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+				QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
+
+	[QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+				QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL,
+	[QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+				QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL,
+	[QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+				QUADD_ARMV7_UNSUPPORTED_EVENT,
+};
+
+static u32 armv7_pmu_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+	return val;
+}
+
+static void armv7_pmu_pmnc_write(u32 val)
+{
+	val &= QUADD_ARMV7_PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static void armv7_pmu_pmnc_enable_counter(int index)
+{
+	u32 val;
+
+	if (index == QUADD_ARMV7_CYCLE_COUNTER)
+		val = QUADD_ARMV7_CCNT;
+	else
+		val = 1 << index;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+}
+
+static void armv7_pmu_select_counter(unsigned int idx)
+{
+	u32 val;
+
+	val = idx & QUADD_ARMV7_SELECT_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+}
+
+static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+{
+	/*
+	* Cortex A8/A9: l1 cache performance counters
+	* don't differentiate between read and write data accesses/misses,
+	* so currently we are devided by two
+	*/
+	if (pmu_ctx.l1_cache_rw &&
+	    (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
+	    pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
+	    (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
+	    event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
+		return value / 2;
+	}
+	return value;
+}
+
+static u32 armv7_pmu_read_counter(int idx)
+{
+	u32 val = 0;
+
+	if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
+		/* Cycle count register (PMCCNTR) reading */
+		asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val));
+	} else {
+		/* counter selection*/
+		armv7_pmu_select_counter(idx);
+		/* event count register reading */
+		asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val));
+	}
+
+	return val;
+}
+
+static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value)
+{
+	if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
+		/* Cycle count register (PMCCNTR) writing */
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	} else {
+		/* counter selection*/
+		armv7_pmu_select_counter(idx);
+		/* event count register writing */
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+	}
+}
+
+static void armv7_pmu_event_select(u32 event)
+{
+	event &= QUADD_ARMV7_EVTSEL_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event));
+}
+
+static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx)
+{
+	u32 val;
+
+	if (idx == QUADD_ARMV7_CYCLE_COUNTER)
+		val = QUADD_ARMV7_CCNT;
+	else
+		val = 1 << idx;
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
+}
+
+static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx)
+{
+	u32 val;
+
+	if (idx == QUADD_ARMV7_CYCLE_COUNTER)
+		val = QUADD_ARMV7_CCNT;
+	else
+		val = 1 << idx;
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+static void armv7_pmnc_disable_all_interrupts(void)
+{
+	u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+static void armv7_pmnc_reset_overflow_flags(void)
+{
+	u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+}
+
+static inline void select_event(unsigned int idx, unsigned int event)
+{
+	/* counter selection */
+	armv7_pmu_select_counter(idx);
+	armv7_pmu_event_select(event);
+}
+
+static inline void disable_all_counters(void)
+{
+	u32 val;
+
+	/* Disable all counters */
+	val = armv7_pmu_pmnc_read();
+	if (val & QUADD_ARMV7_PMNC_E)
+		armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
+}
+
+static inline void enable_all_counters(void)
+{
+	u32 val;
+
+	/* Enable all counters */
+	val = armv7_pmu_pmnc_read();
+	val |= QUADD_ARMV7_PMNC_E | QUADD_ARMV7_PMNC_X;
+	armv7_pmu_pmnc_write(val);
+}
+
+static inline void quadd_init_pmu(void)
+{
+	armv7_pmnc_reset_overflow_flags();
+	armv7_pmnc_disable_all_interrupts();
+}
+
+static inline void reset_all_counters(void)
+{
+	u32 val;
+
+	val = armv7_pmu_pmnc_read();
+	val |= QUADD_ARMV7_PMNC_P | QUADD_ARMV7_PMNC_C;
+	armv7_pmu_pmnc_write(val);
+}
+
+static int pmu_enable(void)
+{
+	int err;
+
+	err = reserve_pmu(ARM_PMU_DEVICE_CPU);
+	if (err) {
+		pr_err("error: pmu was not reserved\n");
+		return err;
+	}
+	pr_info("pmu was reserved\n");
+	return 0;
+}
+
+static void pmu_disable(void)
+{
+	release_pmu(ARM_PMU_DEVICE_CPU);
+	pr_info("pmu was released\n");
+}
+
+static void pmu_start(void)
+{
+	int i, idx;
+	u32 event;
+	u32 *prevp = __get_cpu_var(pmu_prev_val);
+
+	disable_all_counters();
+	quadd_init_pmu();
+
+	for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+		struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+
+		prevp[i] = 0;
+
+		event = pmu_event->hw_value;
+		idx = pmu_event->counter_idx;
+
+		if (idx != QUADD_ARMV7_CYCLE_COUNTER)
+			select_event(idx, event);
+
+		armv7_pmu_pmnc_enable_counter(idx);
+	}
+
+	reset_all_counters();
+	enable_all_counters();
+
+	qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
+}
+
+static void pmu_stop(void)
+{
+	reset_all_counters();
+	disable_all_counters();
+
+	qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
+}
+
+static int __maybe_unused pmu_read(struct event_data *events)
+{
+	int idx, i;
+	u32 val;
+	u32 *prevp = __get_cpu_var(pmu_prev_val);
+
+	if (pmu_ctx.nr_used_counters == 0) {
+		pr_warn_once("error: counters were not initialized\n");
+		return 0;
+	}
+
+	for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+		struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+
+		idx = pmu_event->counter_idx;
+
+		val = armv7_pmu_read_counter(idx);
+		val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id);
+
+		events[i].event_source = QUADD_EVENT_SOURCE_PMU;
+		events[i].event_id = pmu_event->quadd_event_id;
+
+		events[i].val = val;
+		events[i].prev_val = prevp[i];
+
+		prevp[i] = val;
+
+		qm_debug_read_counter(events[i].event_id, events[i].prev_val,
+				      events[i].val);
+	}
+
+	return pmu_ctx.nr_used_counters;
+}
+
+static int __maybe_unused pmu_read_emulate(struct event_data *events)
+{
+	int i;
+	static u32 val = 100;
+	u32 *prevp = __get_cpu_var(pmu_prev_val);
+
+	for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+		if (val > 200)
+			val = 100;
+
+		events[i].event_id = prevp[i];
+		events[i].val = val;
+
+		val += 5;
+	}
+
+	return pmu_ctx.nr_used_counters;
+}
+
+static int set_events(int *events, int size)
+{
+	int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0;
+
+	pmu_ctx.l1_cache_rw = 0;
+	pmu_ctx.nr_used_counters = 0;
+
+	if (!events || size == 0)
+		return 0;
+
+	if (size > QUADD_MAX_PMU_COUNTERS) {
+		pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS);
+		return -ENOSPC;
+	}
+
+	if (!pmu_ctx.current_map) {
+		pr_err("Invalid current_map\n");
+		return -ENODEV;
+	}
+
+	for (i = 0; i < size; i++) {
+		struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+
+		if (events[i] > QUADD_EVENT_TYPE_MAX) {
+			pr_err("Error event: %d\n", events[i]);
+			return -EINVAL;
+		}
+
+		if (curr_idx >= pmu_ctx.nr_counters) {
+			pr_err("Too many events (> %d)\n",
+			       pmu_ctx.nr_counters);
+			return -ENOSPC;
+		}
+
+		if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
+			pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
+			pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER;
+		} else {
+			pmu_event->hw_value = pmu_ctx.current_map[events[i]];
+			pmu_event->counter_idx = curr_idx++;
+		}
+		pmu_event->quadd_event_id = events[i];
+
+		if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
+			nr_l1_r++;
+		else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
+			nr_l1_w++;
+
+		pr_info("Event has been added: id/pmu value: %s/%#x\n",
+			quadd_get_event_str(events[i]),
+			pmu_event->hw_value);
+	}
+	pmu_ctx.nr_used_counters = size;
+
+	if (nr_l1_r > 0 && nr_l1_w > 0)
+		pmu_ctx.l1_cache_rw = 1;
+
+	return 0;
+}
+
+static int get_supported_events(int *events)
+{
+	int i, nr_events = 0;
+
+	for (i = 0; i < QUADD_EVENT_TYPE_MAX; i++) {
+		if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT)
+			events[nr_events++] = i;
+	}
+	return nr_events;
+}
+
+static struct quadd_event_source_interface pmu_armv7_int = {
+	.enable			= pmu_enable,
+	.disable		= pmu_disable,
+
+	.start			= pmu_start,
+	.stop			= pmu_stop,
+
+#ifndef QUADD_USE_EMULATE_COUNTERS
+	.read			= pmu_read,
+#else
+	.read			= pmu_read_emulate,
+#endif
+	.set_events		= set_events,
+	.get_supported_events	= get_supported_events,
+};
+
+struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
+{
+	struct quadd_event_source_interface *pmu = NULL;
+	unsigned long cpu_id, cpu_implementer, part_number;
+
+	cpu_id = read_cpuid_id();
+	cpu_implementer = cpu_id >> 24;
+	part_number = cpu_id & 0xFFF0;
+
+	if (cpu_implementer == QUADD_ARM_CPU_IMPLEMENTER) {
+		switch (part_number) {
+		case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9:
+			pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9;
+			strcpy(pmu_ctx.arch_name, "Cortex A9");
+			pmu_ctx.nr_counters = 6;
+			pmu_ctx.counters_mask =
+				QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
+			pmu_ctx.current_map = quadd_armv7_a9_events_map;
+			pmu = &pmu_armv7_int;
+			break;
+
+		case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15:
+			pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15;
+			strcpy(pmu_ctx.arch_name, "Cortex A15");
+			pmu_ctx.nr_counters = 6;
+			pmu_ctx.counters_mask =
+				QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
+			pmu_ctx.current_map = quadd_armv7_a15_events_map;
+			pmu = &pmu_armv7_int;
+			break;
+
+		default:
+			pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN;
+			strcpy(pmu_ctx.arch_name, "Unknown");
+			pmu_ctx.nr_counters = 0;
+			pmu_ctx.current_map = NULL;
+			break;
+		}
+	}
+
+	pr_info("arch: %s, number of counters: %d\n",
+		pmu_ctx.arch_name, pmu_ctx.nr_counters);
+	return pmu;
+}
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.h b/drivers/misc/tegra-profiler/armv7_pmu.h
new file mode 100644
index 000000000000..827fe4292a33
--- /dev/null
+++ b/drivers/misc/tegra-profiler/armv7_pmu.h
@@ -0,0 +1,269 @@
+/*
+ * drivers/misc/tegra-profiler/armv7_pmu.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __ARMV7_PMU_H
+#define __ARMV7_PMU_H
+
+#define QUADD_ARM_CPU_IMPLEMENTER 0x41
+
+enum {
+	QUADD_ARM_CPU_TYPE_UNKNOWN,
+	QUADD_ARM_CPU_TYPE_CORTEX_A5,
+	QUADD_ARM_CPU_TYPE_CORTEX_A8,
+	QUADD_ARM_CPU_TYPE_CORTEX_A9,
+	QUADD_ARM_CPU_TYPE_CORTEX_A15,
+};
+
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A5	0xC050
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A8	0xC080
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9	0xC090
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15	0xC0F0
+
+
+#define QUADD_MAX_PMU_COUNTERS	32
+
+struct quadd_pmu_event_info {
+	int quadd_event_id;
+	int hw_value;
+	int counter_idx;
+};
+
+struct armv7_pmu_ctx {
+	int arch;
+	char arch_name[32];
+
+	int nr_counters;
+	u32 counters_mask;
+
+	struct quadd_pmu_event_info pmu_events[QUADD_MAX_PMU_COUNTERS];
+	int nr_used_counters;
+
+	int l1_cache_rw;
+	int *current_map;
+};
+
+struct quadd_event_source_interface;
+
+extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void);
+
+/*
+ * PMNC Register
+ */
+
+ /* 0/1: disables/enables all counters, including CCNT */
+#define QUADD_ARMV7_PMNC_E		(1 << 0)
+/* 1: Resets all performance counters to zero. */
+#define QUADD_ARMV7_PMNC_P		(1 << 1)
+/* 1: Resets cycle counter, CCNT, to zero. */
+#define QUADD_ARMV7_PMNC_C		(1 << 2)
+/* 0: counts every processor clock cycle, reset value. 1:
+   counts every 64th processor clock cycle. */
+#define QUADD_ARMV7_PMNC_D		(1 << 3)
+/* 0/1: Export to ETM disabled/enabled */
+#define QUADD_ARMV7_PMNC_X		(1 << 4)
+/* 0/1: count is disabled/enabled in regions where
+   non-invasive debug is prohibited */
+#define QUADD_ARMV7_PMNC_DP		(1 << 5)
+/* Mask for writable bits */
+#define	QUADD_ARMV7_PMNC_MASK		0x3f
+
+
+#define QUADD_ARMV7_CCNT		(1 << 31)	/* Cycle counter */
+
+#define QUADD_ARMV7_CYCLE_COUNTER	-1
+
+/*
+ * CNTENS: counters enable reg
+ */
+#define QUADD_ARMV7_CNTENS_P(i)		(1 << i)
+#define QUADD_ARMV7_CNTENS_C		(1 << QUADD_ARMV7_CCNT)
+
+/*
+ * CNTENC: counters disable reg
+ */
+#define QUADD_ARMV7_CNTENC_P(i)	(1 << i)
+#define QUADD_ARMV7_CNTENC_C	(1 << QUADD_ARMV7_CCNT)
+
+/*
+ * Performance Counter Selection Register mask
+ */
+#define QUADD_ARMV7_SELECT_MASK	0x1f
+
+/*
+ * EVTSEL Register mask
+ */
+#define QUADD_ARMV7_EVTSEL_MASK	0xff
+
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A5		0x03
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A8		0x0f
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9		0x3f
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15		0x3f
+
+enum quadd_armv7_common_events {
+	QUADD_ARMV7_HW_EVENT_PMNC_SW_INCR		= 0x00,
+	QUADD_ARMV7_HW_EVENT_IFETCH_MISS		= 0x01,
+	QUADD_ARMV7_HW_EVENT_ITLB_MISS			= 0x02,
+	QUADD_ARMV7_HW_EVENT_DCACHE_REFILL		= 0x03,
+	QUADD_ARMV7_HW_EVENT_DCACHE_ACCESS		= 0x04,
+	QUADD_ARMV7_HW_EVENT_DTLB_REFILL		= 0x05,
+	QUADD_ARMV7_HW_EVENT_DREAD			= 0x06,
+	QUADD_ARMV7_HW_EVENT_DWRITE			= 0x07,
+	QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED		= 0x08,
+	QUADD_ARMV7_HW_EVENT_EXC_TAKEN			= 0x09,
+	QUADD_ARMV7_HW_EVENT_EXC_EXECUTED		= 0x0A,
+	QUADD_ARMV7_HW_EVENT_CID_WRITE			= 0x0B,
+	QUADD_ARMV7_HW_EVENT_PC_WRITE			= 0x0C,
+	QUADD_ARMV7_HW_EVENT_PC_IMM_BRANCH		= 0x0D,
+	QUADD_ARMV7_HW_EVENT_PC_PROC_RETURN		= 0x0E,
+	QUADD_ARMV7_HW_EVENT_UNALIGNED_ACCESS		= 0x0F,
+
+	QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED		= 0x10,
+	QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES		= 0x11,
+	QUADD_ARMV7_HW_EVENT_PC_BRANCH_PRED		= 0x12,
+	QUADD_ARMV7_HW_EVENT_MEM_ACCESS			= 0x13,
+	QUADD_ARMV7_HW_EVENT_L1_ICACHE_ACCESS		= 0x14,
+	QUADD_ARMV7_HW_EVENT_L1_DCACHE_WB		= 0x15,
+	QUADD_ARMV7_HW_EVENT_L2_DCACHE_ACCESS		= 0x16,
+	QUADD_ARMV7_HW_EVENT_L2_DCACHE_REFILL		= 0x17,
+	QUADD_ARMV7_HW_EVENT_L2_DCACHE_WB		= 0x18,
+	QUADD_ARMV7_HW_EVENT_BUS_ACCESS			= 0x19,
+	QUADD_ARMV7_HW_EVENT_MEMORY_ERROR		= 0x1A,
+	QUADD_ARMV7_HW_EVENT_INSTR_SPEC			= 0x1B,
+	QUADD_ARMV7_HW_EVENT_TTBR_WRITE			= 0x1C,
+	QUADD_ARMV7_HW_EVENT_BUS_CYCLES			= 0x1D,
+};
+
+enum quadd_armv7_a8_specific_events {
+	QUADD_ARMV7_A8_HW_EVENT_WRITE_BUFFER_FULL			= 0x40,
+	QUADD_ARMV7_A8_HW_EVENT_L2_STORE_MERGED				= 0x41,
+	QUADD_ARMV7_A8_HW_EVENT_L2_STORE_BUFF				= 0x42,
+	QUADD_ARMV7_A8_HW_EVENT_L2_ACCESS				= 0x43,
+	QUADD_ARMV7_A8_HW_EVENT_L2_CACH_MISS				= 0x44,
+	QUADD_ARMV7_A8_HW_EVENT_AXI_READ_CYCLES				= 0x45,
+	QUADD_ARMV7_A8_HW_EVENT_AXI_WRITE_CYCLES			= 0x46,
+	QUADD_ARMV7_A8_HW_EVENT_MEMORY_REPLAY				= 0x47,
+	QUADD_ARMV7_A8_HW_EVENT_UNALIGNED_ACCESS_REPLAY			= 0x48,
+	QUADD_ARMV7_A8_HW_EVENT_L1_DATA_MISS				= 0x49,
+	QUADD_ARMV7_A8_HW_EVENT_L1_INST_MISS				= 0x4A,
+	QUADD_ARMV7_A8_HW_EVENT_L1_DATA_COLORING			= 0x4B,
+	QUADD_ARMV7_A8_HW_EVENT_L1_NEON_DATA				= 0x4C,
+	QUADD_ARMV7_A8_HW_EVENT_L1_NEON_CACH_DATA			= 0x4D,
+	QUADD_ARMV7_A8_HW_EVENT_L2_NEON					= 0x4E,
+	QUADD_ARMV7_A8_HW_EVENT_L2_NEON_HIT				= 0x4F,
+	QUADD_ARMV7_A8_HW_EVENT_L1_INST					= 0x50,
+	QUADD_ARMV7_A8_HW_EVENT_PC_RETURN_MIS_PRED			= 0x51,
+	QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_FAILED			= 0x52,
+	QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_TAKEN				= 0x53,
+	QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_EXECUTED			= 0x54,
+	QUADD_ARMV7_A8_HW_EVENT_OP_EXECUTED				= 0x55,
+	QUADD_ARMV7_A8_HW_EVENT_CYCLES_INST_STALL			= 0x56,
+	QUADD_ARMV7_A8_HW_EVENT_CYCLES_INST				= 0x57,
+	QUADD_ARMV7_A8_HW_EVENT_CYCLES_NEON_DATA_STALL			= 0x58,
+	QUADD_ARMV7_A8_HW_EVENT_CYCLES_NEON_INST_STALL			= 0x59,
+	QUADD_ARMV7_A8_HW_EVENT_NEON_CYCLES				= 0x5A,
+
+	QUADD_ARMV7_A8_HW_EVENT_PMU0_EVENTS				= 0x70,
+	QUADD_ARMV7_A8_HW_EVENT_PMU1_EVENTS				= 0x71,
+	QUADD_ARMV7_A8_HW_EVENT_PMU_EVENTS				= 0x72,
+};
+
+enum quadd_armv7_a9_specific_events {
+	QUADD_ARMV7_A9_HW_EVENT_JAVA_HW_BYTECODE_EXEC			= 0x40,
+	QUADD_ARMV7_A9_HW_EVENT_JAVA_SW_BYTECODE_EXEC			= 0x41,
+	QUADD_ARMV7_A9_HW_EVENT_JAZELLE_BRANCH_EXEC			= 0x42,
+
+	QUADD_ARMV7_A9_HW_EVENT_COHERENT_LINE_MISS			= 0x50,
+	QUADD_ARMV7_A9_HW_EVENT_COHERENT_LINE_HIT			= 0x51,
+
+	QUADD_ARMV7_A9_HW_EVENT_ICACHE_DEP_STALL_CYCLES			= 0x60,
+	QUADD_ARMV7_A9_HW_EVENT_DCACHE_DEP_STALL_CYCLES			= 0x61,
+	QUADD_ARMV7_A9_HW_EVENT_TLB_MISS_DEP_STALL_CYCLES		= 0x62,
+	QUADD_ARMV7_A9_HW_EVENT_STREX_EXECUTED_PASSED			= 0x63,
+	QUADD_ARMV7_A9_HW_EVENT_STREX_EXECUTED_FAILED			= 0x64,
+	QUADD_ARMV7_A9_HW_EVENT_DATA_EVICTION				= 0x65,
+	QUADD_ARMV7_A9_HW_EVENT_ISSUE_STAGE_NO_INST			= 0x66,
+	QUADD_ARMV7_A9_HW_EVENT_ISSUE_STAGE_EMPTY			= 0x67,
+	QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE		= 0x68,
+
+	QUADD_ARMV7_A9_HW_EVENT_PREDICTABLE_FUNCT_RETURNS		= 0x6E,
+
+	QUADD_ARMV7_A9_HW_EVENT_MAIN_UNIT_EXECUTED_INST			= 0x70,
+	QUADD_ARMV7_A9_HW_EVENT_SECOND_UNIT_EXECUTED_INST		= 0x71,
+	QUADD_ARMV7_A9_HW_EVENT_LD_ST_UNIT_EXECUTED_INST		= 0x72,
+	QUADD_ARMV7_A9_HW_EVENT_FP_EXECUTED_INST			= 0x73,
+	QUADD_ARMV7_A9_HW_EVENT_NEON_EXECUTED_INST			= 0x74,
+
+	QUADD_ARMV7_A9_HW_EVENT_PLD_FULL_DEP_STALL_CYCLES		= 0x80,
+	QUADD_ARMV7_A9_HW_EVENT_DATA_WR_DEP_STALL_CYCLES		= 0x81,
+	QUADD_ARMV7_A9_HW_EVENT_ITLB_MISS_DEP_STALL_CYCLES		= 0x82,
+	QUADD_ARMV7_A9_HW_EVENT_DTLB_MISS_DEP_STALL_CYCLES		= 0x83,
+	QUADD_ARMV7_A9_HW_EVENT_MICRO_ITLB_MISS_DEP_STALL_CYCLES	= 0x84,
+	QUADD_ARMV7_A9_HW_EVENT_MICRO_DTLB_MISS_DEP_STALL_CYCLES	= 0x85,
+	QUADD_ARMV7_A9_HW_EVENT_DMB_DEP_STALL_CYCLES			= 0x86,
+
+	QUADD_ARMV7_A9_HW_EVENT_INTGR_CLK_ENABLED_CYCLES		= 0x8A,
+	QUADD_ARMV7_A9_HW_EVENT_DATA_ENGINE_CLK_EN_CYCLES		= 0x8B,
+
+	QUADD_ARMV7_A9_HW_EVENT_ISB_INST				= 0x90,
+	QUADD_ARMV7_A9_HW_EVENT_DSB_INST				= 0x91,
+	QUADD_ARMV7_A9_HW_EVENT_DMB_INST				= 0x92,
+	QUADD_ARMV7_A9_HW_EVENT_EXT_INTERRUPTS				= 0x93,
+
+	QUADD_ARMV7_A9_HW_EVENT_PLE_CACHE_LINE_RQST_COMPLETED		= 0xA0,
+	QUADD_ARMV7_A9_HW_EVENT_PLE_CACHE_LINE_RQST_SKIPPED		= 0xA1,
+	QUADD_ARMV7_A9_HW_EVENT_PLE_FIFO_FLUSH				= 0xA2,
+	QUADD_ARMV7_A9_HW_EVENT_PLE_RQST_COMPLETED			= 0xA3,
+	QUADD_ARMV7_A9_HW_EVENT_PLE_FIFO_OVERFLOW			= 0xA4,
+	QUADD_ARMV7_A9_HW_EVENT_PLE_RQST_PROG				= 0xA5
+};
+
+enum quadd_armv7_a5_specific_events {
+	QUADD_ARMV7_A5_HW_EVENT_IRQ_TAKEN				= 0x86,
+	QUADD_ARMV7_A5_HW_EVENT_FIQ_TAKEN				= 0x87,
+
+	QUADD_ARMV7_A5_HW_EVENT_EXT_MEM_RQST				= 0xc0,
+	QUADD_ARMV7_A5_HW_EVENT_NC_EXT_MEM_RQST				= 0xc1,
+	QUADD_ARMV7_A5_HW_EVENT_PREFETCH_LINEFILL			= 0xc2,
+	QUADD_ARMV7_A5_HW_EVENT_PREFETCH_LINEFILL_DROP			= 0xc3,
+	QUADD_ARMV7_A5_HW_EVENT_ENTER_READ_ALLOC			= 0xc4,
+	QUADD_ARMV7_A5_HW_EVENT_READ_ALLOC				= 0xc5,
+
+	QUADD_ARMV7_A5_HW_EVENT_STALL_SB_FULL				= 0xc9,
+};
+
+enum quadd_armv7_a15_specific_events {
+	QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_ACCESS	= 0x40,
+	QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_ACCESS	= 0x41,
+	QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL	= 0x42,
+	QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL	= 0x43,
+
+	QUADD_ARMV7_A15_HW_EVENT_L1_DTLB_READ_REFILL	= 0x4C,
+	QUADD_ARMV7_A15_HW_EVENT_L1_DTLB_WRITE_REFILL	= 0x4D,
+
+	QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_ACCESS	= 0x50,
+	QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_ACCESS	= 0x51,
+	QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL	= 0x52,
+	QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL	= 0x53,
+
+	QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE		= 0x76,
+};
+
+#define QUADD_ARMV7_UNSUPPORTED_EVENT	0xff00
+#define QUADD_ARMV7_CPU_CYCLE_EVENT	0xffff
+
+void quadd_pmu_test(void);
+
+#endif	/* __ARMV7_PMU_H */
diff --git a/drivers/misc/tegra-profiler/auth.c b/drivers/misc/tegra-profiler/auth.c
new file mode 100644
index 000000000000..654f401b672a
--- /dev/null
+++ b/drivers/misc/tegra-profiler/auth.c
@@ -0,0 +1,336 @@
+/*
+ * drivers/misc/tegra-profiler/auth.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include "auth.h"
+#include "quadd.h"
+#include "debug.h"
+
+#define QUADD_SECURITY_MAGIC_REQUEST	0x11112222
+#define QUADD_SECURITY_MAGIC_RESPONSE	0x33334444
+
+#define QUADD_TIMEOUT	1000	/* msec */
+
+enum {
+	QUADD_SECURITY_RESPONSE_ERROR			= 0,
+	QUADD_SECURITY_RESPONSE_DEBUG_FLAG_ON		= 1,
+	QUADD_SECURITY_RESPONSE_DEBUG_FLAG_OFF		= 2,
+	QUADD_SECURITY_RESPONSE_PACKAGE_NOT_FOUND	= 3,
+};
+
+enum {
+	QUADD_SECURITY_REQUEST_CMD_TEST_DEBUG_FLAG	= 1,
+	QUADD_SECURITY_RESPONSE_CMD_TEST_DEBUG_FLAG	= 2,
+};
+
+struct quadd_auth_data {
+	char package_name[QUADD_MAX_PACKAGE_NAME];
+
+	uid_t debug_app_uid;
+	int response_value;
+};
+
+static struct quadd_auth_context {
+	struct miscdevice misc_dev;
+
+	atomic_t opened;
+
+	wait_queue_head_t request_wait;
+	wait_queue_head_t response_wait;
+
+	int request_ready;
+	int response_ready;
+	struct quadd_auth_data data;
+	struct mutex lock;
+
+	unsigned int msg_id;
+
+	struct quadd_ctx *quadd_ctx;
+} auth_ctx;
+
+static inline void response_ready(void)
+{
+	auth_ctx.response_ready = 1;
+	wake_up_interruptible(&auth_ctx.response_wait);
+}
+
+static inline void request_ready(void)
+{
+	auth_ctx.request_ready = 1;
+	wake_up_interruptible(&auth_ctx.request_wait);
+}
+
+static int auth_open(struct inode *inode, struct file *file)
+{
+	struct quadd_auth_data *data = &auth_ctx.data;
+
+	if (atomic_cmpxchg(&auth_ctx.opened, 0, 1)) {
+		pr_err("Error: auth file is already opened\n");
+		return -EBUSY;
+	}
+	pr_info("auth is opened\n");
+
+	auth_ctx.request_ready = 0;
+	auth_ctx.response_ready = 0;
+
+	mutex_lock(&auth_ctx.lock);
+	data->package_name[0] = '\0';
+	data->debug_app_uid = 0;
+	data->response_value = 0;
+	mutex_unlock(&auth_ctx.lock);
+
+	return 0;
+}
+
+static int auth_release(struct inode *inode, struct file *file)
+{
+	pr_info("auth is released\n");
+	atomic_set(&auth_ctx.opened, 0);
+	return 0;
+}
+
+static ssize_t
+auth_read(struct file *filp,
+	    char __user *user_buf,
+	    size_t length,
+	    loff_t *offset)
+{
+	char buf[QUADD_MAX_PACKAGE_NAME + 4 * sizeof(u32)];
+	int msg_length, err;
+	struct quadd_auth_data *data = &auth_ctx.data;
+
+	wait_event_interruptible(auth_ctx.request_wait, auth_ctx.request_ready);
+
+	mutex_lock(&auth_ctx.lock);
+
+	((u32 *)buf)[0] = QUADD_SECURITY_MAGIC_REQUEST;
+	((u32 *)buf)[1] = ++auth_ctx.msg_id;
+	((u32 *)buf)[2] = QUADD_SECURITY_REQUEST_CMD_TEST_DEBUG_FLAG;
+	((u32 *)buf)[3] = strlen(data->package_name);
+
+	strcpy(buf + 4 * sizeof(u32), data->package_name);
+	msg_length = strlen(data->package_name) + 4 * sizeof(u32);
+
+	mutex_unlock(&auth_ctx.lock);
+
+	err = copy_to_user(user_buf, buf, msg_length);
+	if (err != 0) {
+		pr_err("Error: copy to user: %d\n", err);
+		return err;
+	}
+
+	pr_info("auth read, msg_length: %d\n", msg_length);
+	return msg_length;
+}
+
+static ssize_t
+auth_write(struct file *file,
+	  const char __user *user_buf,
+	  size_t count,
+	  loff_t *ppos)
+{
+	int err;
+	char buf[5 * sizeof(u32)];
+	u32 magic, response_cmd, response_value, length, uid, msg_id;
+	struct quadd_auth_data *data = &auth_ctx.data;
+
+	pr_info("auth read, count: %d\n", count);
+
+	mutex_lock(&auth_ctx.lock);
+	data->response_value = QUADD_SECURITY_RESPONSE_ERROR;
+	data->debug_app_uid = 0;
+	mutex_unlock(&auth_ctx.lock);
+
+	if (count < 5 * sizeof(u32)) {
+		pr_err("Error count: %u\n", count);
+		response_ready();
+		return -E2BIG;
+	}
+
+	err = copy_from_user(buf, user_buf, 5 * sizeof(u32));
+	if (err) {
+		pr_err("Error: copy from user: %d\n", err);
+		response_ready();
+		return err;
+	}
+
+	magic = ((u32 *)buf)[0];
+	if (magic != QUADD_SECURITY_MAGIC_RESPONSE) {
+		pr_err("Error magic: %#x\n", magic);
+		response_ready();
+		return -EINVAL;
+	}
+
+	msg_id = ((u32 *)buf)[1];
+	if (msg_id != auth_ctx.msg_id) {
+		pr_err("Error message id: %u\n", msg_id);
+		response_ready();
+		return -EINVAL;
+	}
+
+	response_cmd = ((u32 *)buf)[2];
+	response_value = ((u32 *)buf)[3];
+	length = ((u32 *)buf)[4];
+
+	switch (response_cmd) {
+	case QUADD_SECURITY_RESPONSE_CMD_TEST_DEBUG_FLAG:
+		if (length != 4) {
+			pr_err("Error: too long data: %u\n", length);
+			response_ready();
+			return -E2BIG;
+		}
+
+		err = get_user(uid, (u32 __user *)user_buf + 5);
+		if (err) {
+			pr_err("Error: copy from user: %d\n", err);
+			response_ready();
+			return err;
+		}
+
+		mutex_lock(&auth_ctx.lock);
+		data->response_value = response_value;
+		data->debug_app_uid = uid;
+		mutex_unlock(&auth_ctx.lock);
+
+		pr_info("uid: %u, response_value: %u\n",
+			uid, response_value);
+		break;
+
+	default:
+		pr_err("Error: invalid response command: %u\n",
+		       response_cmd);
+		response_ready();
+		return -EINVAL;
+	}
+	response_ready();
+
+	return count;
+}
+
+static const struct file_operations auth_fops = {
+	.read		= auth_read,
+	.write		= auth_write,
+	.open		= auth_open,
+	.release	= auth_release,
+};
+
+int quadd_auth_check_debug_flag(const char *package_name)
+{
+	int uid, response_value;
+	struct quadd_auth_data *data = &auth_ctx.data;
+	int pkg_name_length;
+
+	if (!package_name)
+		return -EINVAL;
+
+	pkg_name_length = strlen(package_name);
+	if (pkg_name_length == 0 ||
+	    pkg_name_length > QUADD_MAX_PACKAGE_NAME)
+		return -EINVAL;
+
+	if (atomic_read(&auth_ctx.opened) == 0)
+		return -EIO;
+
+	mutex_lock(&auth_ctx.lock);
+	data->debug_app_uid = 0;
+	data->response_value = 0;
+
+	strncpy(data->package_name, package_name, QUADD_MAX_PACKAGE_NAME);
+	mutex_unlock(&auth_ctx.lock);
+
+	request_ready();
+
+	wait_event_interruptible_timeout(auth_ctx.response_wait,
+					 auth_ctx.response_ready,
+					 msecs_to_jiffies(QUADD_TIMEOUT));
+	if (!auth_ctx.response_ready) {
+		pr_err("Error: Tegra profiler service did not answer\n");
+		return -ETIMEDOUT;
+	}
+
+	mutex_lock(&auth_ctx.lock);
+	uid = data->debug_app_uid;
+	response_value = data->response_value;
+	mutex_unlock(&auth_ctx.lock);
+
+	switch (response_value) {
+	case QUADD_SECURITY_RESPONSE_DEBUG_FLAG_ON:
+		pr_info("package %s is debuggable, uid: %d\n",
+			package_name, uid);
+		return uid;
+
+	case QUADD_SECURITY_RESPONSE_DEBUG_FLAG_OFF:
+		pr_info("package %s is not debuggable\n",
+			package_name);
+		return 0;
+
+	case QUADD_SECURITY_RESPONSE_PACKAGE_NOT_FOUND:
+		pr_err("Error: package %s not found\n", package_name);
+		return -ESRCH;
+
+	case QUADD_SECURITY_RESPONSE_ERROR:
+	default:
+		pr_err("Error: invalid response\n");
+		return -EBADMSG;
+	}
+}
+
+int quadd_auth_init(struct quadd_ctx *quadd_ctx)
+{
+	int err;
+	struct miscdevice *misc_dev = &auth_ctx.misc_dev;
+
+	pr_info("auth: init\n");
+
+	misc_dev->minor = MISC_DYNAMIC_MINOR;
+	misc_dev->name = QUADD_AUTH_DEVICE_NAME;
+	misc_dev->fops = &auth_fops;
+
+	err = misc_register(misc_dev);
+	if (err < 0) {
+		pr_err("Error: misc_register %d\n", err);
+		return err;
+	}
+
+	init_waitqueue_head(&auth_ctx.request_wait);
+	init_waitqueue_head(&auth_ctx.response_wait);
+
+	auth_ctx.request_ready = 0;
+	auth_ctx.response_ready = 0;
+
+	atomic_set(&auth_ctx.opened, 0);
+	mutex_init(&auth_ctx.lock);
+	auth_ctx.msg_id = 0;
+
+	auth_ctx.quadd_ctx = quadd_ctx;
+	return 0;
+}
+
+void quadd_auth_deinit(void)
+{
+	struct miscdevice *misc_dev = &auth_ctx.misc_dev;
+
+	pr_info("auth: deinit\n");
+	misc_deregister(misc_dev);
+}
diff --git a/drivers/misc/tegra-profiler/auth.h b/drivers/misc/tegra-profiler/auth.h
new file mode 100644
index 000000000000..aa810f2d5a63
--- /dev/null
+++ b/drivers/misc/tegra-profiler/auth.h
@@ -0,0 +1,27 @@
+/*
+ * drivers/misc/tegra-profiler/auth.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_AUTH_H__
+#define __QUADD_AUTH_H__
+
+struct quadd_ctx;
+
+int quadd_auth_check_debug_flag(const char *package_name);
+
+int quadd_auth_init(struct quadd_ctx *quadd_ctx);
+void quadd_auth_deinit(void);
+
+#endif	/* __QUADD_AUTH_H__ */
diff --git a/drivers/misc/tegra-profiler/backtrace.c b/drivers/misc/tegra-profiler/backtrace.c
new file mode 100644
index 000000000000..dd7c67cded5a
--- /dev/null
+++ b/drivers/misc/tegra-profiler/backtrace.c
@@ -0,0 +1,167 @@
+/*
+ * drivers/misc/tegra-profiler/backtrace.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <asm-generic/uaccess.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "backtrace.h"
+
+#define QUADD_USER_SPACE_MIN_ADDR	0x8000
+
+static inline void
+quadd_callchain_store(struct quadd_callchain *callchain_data, u32 ip)
+{
+	if (callchain_data->nr < QUADD_MAX_STACK_DEPTH) {
+		/* pr_debug("[%d] Add entry: %#llx\n",
+			    callchain_data->nr, ip); */
+		callchain_data->callchain[callchain_data->nr++] = ip;
+	}
+}
+
+static int
+check_vma_address(unsigned long addr, struct vm_area_struct *vma)
+{
+	unsigned long start, end;
+
+	if (vma) {
+		start = vma->vm_start;
+		end = vma->vm_end;
+		if (addr >= start && addr + sizeof(unsigned long) <= end)
+			return 0;
+	}
+	return -EINVAL;
+}
+
+static unsigned long __user *
+user_backtrace(unsigned long __user *tail,
+	       struct quadd_callchain *callchain_data,
+	       struct vm_area_struct *stack_vma)
+{
+	unsigned long value, value_lr = 0, value_fp = 0;
+	unsigned long __user *fp_prev = NULL;
+
+	if (check_vma_address((unsigned long)tail, stack_vma))
+		return NULL;
+
+	if (__copy_from_user_inatomic(&value, tail, sizeof(unsigned long)))
+		return NULL;
+
+	if (!check_vma_address(value, stack_vma)) {
+		/* clang's frame */
+		value_fp = value;
+
+		if (check_vma_address((unsigned long)(tail + 1), stack_vma))
+			return NULL;
+
+		if (__copy_from_user_inatomic(&value_lr, tail + 1,
+					      sizeof(unsigned long)))
+			return NULL;
+	} else {
+		/* gcc's frame */
+		if (__copy_from_user_inatomic(&value_fp, tail - 1,
+					      sizeof(unsigned long)))
+			return NULL;
+
+		if (check_vma_address(value_fp, stack_vma))
+			return NULL;
+
+		value_lr = value;
+	}
+
+	fp_prev = (unsigned long __user *)value_fp;
+
+	if (value_lr < QUADD_USER_SPACE_MIN_ADDR)
+		return NULL;
+
+	quadd_callchain_store(callchain_data, value_lr);
+
+	if (fp_prev <= tail)
+		return NULL;
+
+	return fp_prev;
+}
+
+unsigned int
+quadd_get_user_callchain(struct pt_regs *regs,
+			 struct quadd_callchain *callchain_data)
+{
+	unsigned long fp, sp, pc, reg;
+	struct vm_area_struct *vma, *vma_pc;
+	unsigned long __user *tail = NULL;
+	struct mm_struct *mm = current->mm;
+
+	callchain_data->nr = 0;
+
+	if (!regs || !user_mode(regs) || !mm)
+		return 0;
+
+	if (thumb_mode(regs))
+		return 0;
+
+	fp = regs->ARM_fp;
+	sp = regs->ARM_sp;
+	pc = regs->ARM_pc;
+
+	if (fp == 0 || fp < sp || fp & 0x3)
+		return 0;
+
+	vma = find_vma(mm, sp);
+	if (check_vma_address(fp, vma))
+		return 0;
+
+	if (__copy_from_user_inatomic(&reg, (unsigned long __user *)fp,
+				      sizeof(unsigned long)))
+		return 0;
+
+	if (reg > fp &&
+	    !check_vma_address(reg, vma)) {
+		unsigned long value;
+		int read_lr = 0;
+
+		if (!check_vma_address(fp + sizeof(unsigned long), vma)) {
+			if (__copy_from_user_inatomic(
+					&value,
+					(unsigned long __user *)fp + 1,
+					sizeof(unsigned long)))
+				return 0;
+
+			vma_pc = find_vma(mm, pc);
+			read_lr = 1;
+		}
+
+		if (!read_lr || check_vma_address(value, vma_pc)) {
+			/* gcc: fp --> short frame tail (fp) */
+
+			if (regs->ARM_lr < QUADD_USER_SPACE_MIN_ADDR)
+				return 0;
+
+			quadd_callchain_store(callchain_data, regs->ARM_lr);
+			tail = (unsigned long __user *)reg;
+		}
+	}
+
+	if (!tail)
+		tail = (unsigned long __user *)fp;
+
+	while (tail && !((unsigned long)tail & 0x3))
+		tail = user_backtrace(tail, callchain_data, vma);
+
+	return callchain_data->nr;
+}
diff --git a/drivers/misc/tegra-profiler/backtrace.h b/drivers/misc/tegra-profiler/backtrace.h
new file mode 100644
index 000000000000..82b55db496f0
--- /dev/null
+++ b/drivers/misc/tegra-profiler/backtrace.h
@@ -0,0 +1,34 @@
+/*
+ * drivers/misc/tegra-profiler/backtrace.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_BACKTRACE_H
+#define __QUADD_BACKTRACE_H
+
+#include <linux/types.h>
+
+#define QUADD_MAX_STACK_DEPTH		64
+
+struct quadd_callchain {
+	int nr;
+	u32 callchain[QUADD_MAX_STACK_DEPTH];
+};
+
+unsigned int
+quadd_get_user_callchain(struct pt_regs *regs,
+			 struct quadd_callchain *callchain_data);
+
+
+#endif  /* __QUADD_BACKTRACE_H */
diff --git a/drivers/misc/tegra-profiler/comm.c b/drivers/misc/tegra-profiler/comm.c
new file mode 100644
index 000000000000..36bfa671893e
--- /dev/null
+++ b/drivers/misc/tegra-profiler/comm.c
@@ -0,0 +1,650 @@
+/*
+ * drivers/misc/tegra-profiler/comm.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/sched.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "comm.h"
+#include "version.h"
+
+#define QUADD_SIZE_RB_BUFFER	(0x100000 * 8)	/* 8 MB */
+
+struct quadd_comm_ctx comm_ctx;
+
+static inline void *rb_alloc(unsigned long size)
+{
+	return vmalloc(size);
+}
+
+static inline void rb_free(void *addr)
+{
+	vfree(addr);
+}
+
+static void rb_reset(struct quadd_ring_buffer *rb)
+{
+	rb->pos_read = 0;
+	rb->pos_write = 0;
+	rb->fill_count = 0;
+}
+
+static int rb_init(struct quadd_ring_buffer *rb, size_t size)
+{
+	spin_lock_init(&rb->lock);
+
+	rb->size = size;
+	rb->buf = NULL;
+
+	rb->buf = (char *) rb_alloc(rb->size);
+	if (!rb->buf) {
+		pr_err("Ring buffer alloc error\n");
+		return 1;
+	}
+	pr_debug("data buffer size: %u\n", (unsigned int)rb->size);
+
+	rb_reset(rb);
+
+	return 0;
+}
+
+static void rb_deinit(struct quadd_ring_buffer *rb)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->lock, flags);
+	if (rb->buf) {
+		rb_reset(rb);
+
+		rb_free(rb->buf);
+		rb->buf = NULL;
+	}
+	spin_unlock_irqrestore(&rb->lock, flags);
+}
+
+static __attribute__((unused)) int rb_is_full(struct quadd_ring_buffer *rb)
+{
+	return rb->fill_count == rb->size;
+}
+
+static int rb_is_empty(struct quadd_ring_buffer *rb)
+{
+	return rb->fill_count == 0;
+}
+
+static size_t
+rb_get_free_space(struct quadd_ring_buffer *rb)
+{
+	return rb->size - rb->fill_count;
+}
+
+static size_t
+rb_write(struct quadd_ring_buffer *rb, char *data, size_t length)
+{
+	size_t new_pos_write, chunk1;
+
+	if (length > rb_get_free_space(rb))
+		return 0;
+
+	new_pos_write = (rb->pos_write + length) % rb->size;
+
+	if (new_pos_write < rb->pos_write) {
+		chunk1 = rb->size - rb->pos_write;
+		memcpy(rb->buf + rb->pos_write, data, chunk1);
+		if (new_pos_write > 0)
+			memcpy(rb->buf, data + chunk1, new_pos_write);
+	} else {
+		memcpy(rb->buf + rb->pos_write, data, length);
+	}
+
+	rb->pos_write = new_pos_write;
+	rb->fill_count += length;
+
+	return length;
+}
+
+static size_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length)
+{
+	if (rb_get_free_space(rb) < length)
+		return 0;
+
+	if (rb->pos_read > length)
+		rb->pos_read -= length;
+	else
+		rb->pos_read += rb->size - length;
+
+	rb->fill_count += sizeof(struct quadd_record_data);
+	return length;
+}
+
+static size_t rb_read(struct quadd_ring_buffer *rb, char *data, size_t length)
+{
+	unsigned int new_pos_read, chunk1;
+
+	if (length > rb->fill_count)
+		return 0;
+
+	new_pos_read = (rb->pos_read + length) % rb->size;
+
+	if (new_pos_read < rb->pos_read) {
+		chunk1 = rb->size - rb->pos_read;
+		memcpy(data, rb->buf + rb->pos_read, chunk1);
+		if (new_pos_read > 0)
+			memcpy(data + chunk1, rb->buf, new_pos_read);
+	} else {
+		memcpy(data, rb->buf + rb->pos_read, length);
+	}
+
+	rb->pos_read = new_pos_read;
+	rb->fill_count -= length;
+
+	return length;
+}
+
+static size_t
+rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length)
+{
+	size_t new_pos_read, chunk1;
+
+	if (length > rb->fill_count)
+		return 0;
+
+	new_pos_read = (rb->pos_read + length) % rb->size;
+
+	if (new_pos_read < rb->pos_read) {
+		chunk1 = rb->size - rb->pos_read;
+		if (copy_to_user(data, rb->buf + rb->pos_read, chunk1)) {
+			pr_err_once("Error: copy_to_user\n");
+			return 0;
+		}
+
+		if (new_pos_read > 0) {
+			if (copy_to_user(data + chunk1, rb->buf,
+					 new_pos_read)) {
+				pr_err_once("Error: copy_to_user\n");
+				return 0;
+			}
+		}
+	} else {
+		if (copy_to_user(data, rb->buf + rb->pos_read, length)) {
+			pr_err_once("Error: copy_to_user\n");
+			return 0;
+		}
+	}
+
+	rb->pos_read = new_pos_read;
+	rb->fill_count -= length;
+
+	return length;
+}
+
+static void
+write_sample(struct quadd_record_data *sample, void *extra_data,
+	     size_t extra_length)
+{
+	unsigned long flags;
+	struct quadd_ring_buffer *rb = &comm_ctx.rb;
+	int length_sample = sizeof(struct quadd_record_data) + extra_length;
+
+	spin_lock_irqsave(&rb->lock, flags);
+
+	if (length_sample > rb_get_free_space(rb)) {
+		pr_err_once("Error: Buffer overflowed, skip sample\n");
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return;
+	}
+
+	if (!rb_write(rb, (char *)sample, sizeof(struct quadd_record_data))) {
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return;
+	}
+
+	if (extra_data && extra_length > 0) {
+		if (!rb_write(rb, extra_data, extra_length)) {
+			pr_err_once("Buffer overflowed, skip sample\n");
+			spin_unlock_irqrestore(&rb->lock, flags);
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&rb->lock, flags);
+}
+
+static int read_sample(char __user *buffer, size_t max_length)
+{
+	unsigned long flags;
+	struct quadd_ring_buffer *rb = &comm_ctx.rb;
+	struct quadd_record_data record;
+	size_t length_extra = 0;
+
+	spin_lock_irqsave(&rb->lock, flags);
+
+	if (rb_is_empty(rb)) {
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (rb->fill_count < sizeof(struct quadd_record_data)) {
+		pr_err_once("Error: data\n");
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data))) {
+		pr_err_once("Error: read sample\n");
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (record.magic != QUADD_RECORD_MAGIC) {
+		pr_err_once("Bad magic: %#x\n", record.magic);
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	switch (record.record_type) {
+	case QUADD_RECORD_TYPE_SAMPLE:
+		length_extra = record.sample.callchain_nr *
+					sizeof(record.sample.ip);
+		break;
+
+	case QUADD_RECORD_TYPE_MMAP:
+		if (record.mmap.filename_length > 0) {
+			length_extra = record.mmap.filename_length;
+		} else {
+			length_extra = 0;
+			pr_err_once("Error: filename\n");
+		}
+		break;
+
+	case QUADD_RECORD_TYPE_DEBUG:
+	case QUADD_RECORD_TYPE_HEADER:
+	case QUADD_RECORD_TYPE_MA:
+		length_extra = 0;
+		break;
+
+	case QUADD_RECORD_TYPE_POWER_RATE:
+		length_extra = record.power_rate.nr_cpus * sizeof(u32);
+		break;
+
+	case QUADD_RECORD_TYPE_ADDITIONAL_SAMPLE:
+		length_extra = record.additional_sample.extra_length;
+		break;
+
+	default:
+		pr_err_once("Error: Unknown sample: %u\n", record.record_type);
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (sizeof(struct quadd_record_data) + length_extra > max_length) {
+		if (!rb_read_undo(rb, sizeof(struct quadd_record_data)))
+			pr_err_once("Error: rb_read_undo\n");
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (length_extra > rb_get_free_space(rb)) {
+		pr_err_once("Error: Incompleted sample\n");
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data))) {
+		pr_err_once("Error: copy_to_user\n");
+		spin_unlock_irqrestore(&rb->lock, flags);
+		return 0;
+	}
+
+	if (length_extra > 0) {
+		if (!rb_read_user(rb, buffer + sizeof(struct quadd_record_data),
+				  length_extra)) {
+			pr_err_once("Error: copy_to_user\n");
+			spin_unlock_irqrestore(&rb->lock, flags);
+			return 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&rb->lock, flags);
+	return sizeof(struct quadd_record_data) + length_extra;
+}
+
+static void put_sample(struct quadd_record_data *data, char *extra_data,
+		       unsigned int extra_length)
+{
+	if (!atomic_read(&comm_ctx.active))
+		return;
+
+	write_sample(data, extra_data, extra_length);
+}
+
+static void comm_reset(void)
+{
+	unsigned long flags;
+
+	pr_debug("Comm reset\n");
+	spin_lock_irqsave(&comm_ctx.rb.lock, flags);
+	rb_reset(&comm_ctx.rb);
+	spin_unlock_irqrestore(&comm_ctx.rb.lock, flags);
+}
+
+static struct quadd_comm_data_interface comm_data = {
+	.put_sample = put_sample,
+	.reset = comm_reset,
+};
+
+static int check_access_permission(void)
+{
+	struct task_struct *task;
+
+	if (capable(CAP_SYS_ADMIN))
+		return 0;
+
+	if (!comm_ctx.params_ok || comm_ctx.process_pid == 0)
+		return -EACCES;
+
+	rcu_read_lock();
+	task = pid_task(find_vpid(comm_ctx.process_pid), PIDTYPE_PID);
+	rcu_read_unlock();
+	if (!task)
+		return -EACCES;
+
+	if (current_fsuid() != task_uid(task) &&
+	    task_uid(task) != comm_ctx.debug_app_uid) {
+		pr_err("Permission denied, owner/task uids: %u/%u\n",
+			   current_fsuid(), task_uid(task));
+		return -EACCES;
+	}
+	return 0;
+}
+
+static int device_open(struct inode *inode, struct file *file)
+{
+	mutex_lock(&comm_ctx.io_mutex);
+	comm_ctx.nr_users++;
+	mutex_unlock(&comm_ctx.io_mutex);
+	return 0;
+}
+
+static int device_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&comm_ctx.io_mutex);
+	comm_ctx.nr_users--;
+
+	if (comm_ctx.nr_users == 0) {
+		if (atomic_cmpxchg(&comm_ctx.active, 1, 0)) {
+			comm_ctx.control->stop();
+			pr_info("Stop profiling: daemon is closed\n");
+		}
+	}
+	mutex_unlock(&comm_ctx.io_mutex);
+
+	return 0;
+}
+
+static ssize_t
+device_read(struct file *filp,
+	    char __user *buffer,
+	    size_t length,
+	    loff_t *offset)
+{
+	int err;
+	size_t was_read = 0, res, samples_counter = 0;
+
+	err = check_access_permission();
+	if (err)
+		return err;
+
+	mutex_lock(&comm_ctx.io_mutex);
+
+	if (!atomic_read(&comm_ctx.active)) {
+		mutex_unlock(&comm_ctx.io_mutex);
+		return -1;
+	}
+
+	while (was_read + sizeof(struct quadd_record_data) < length) {
+		res = read_sample(buffer + was_read, length - was_read);
+		if (res == 0)
+			break;
+
+		was_read += res;
+		samples_counter++;
+
+		if (!atomic_read(&comm_ctx.active))
+			break;
+	}
+
+	mutex_unlock(&comm_ctx.io_mutex);
+	return was_read;
+}
+
+static long
+device_ioctl(struct file *file,
+	     unsigned int ioctl_num,
+	     unsigned long ioctl_param)
+{
+	int err;
+	struct quadd_parameters user_params;
+	struct quadd_comm_cap cap;
+	struct quadd_module_state state;
+	struct quadd_module_version versions;
+	unsigned long flags;
+	struct quadd_ring_buffer *rb = &comm_ctx.rb;
+
+	if (ioctl_num != IOCTL_SETUP &&
+	    ioctl_num != IOCTL_GET_CAP &&
+	    ioctl_num != IOCTL_GET_STATE &&
+	    ioctl_num != IOCTL_GET_VERSION) {
+		err = check_access_permission();
+		if (err)
+			return err;
+	}
+
+	mutex_lock(&comm_ctx.io_mutex);
+
+	switch (ioctl_num) {
+	case IOCTL_SETUP:
+		if (atomic_read(&comm_ctx.active)) {
+			pr_err("error: tegra profiler is active\n");
+			mutex_unlock(&comm_ctx.io_mutex);
+			return -EBUSY;
+		}
+
+		if (copy_from_user(&user_params, (void __user *)ioctl_param,
+				   sizeof(struct quadd_parameters))) {
+			pr_err("setup failed\n");
+			mutex_unlock(&comm_ctx.io_mutex);
+			return -EFAULT;
+		}
+
+		err = comm_ctx.control->set_parameters(&user_params,
+						       &comm_ctx.debug_app_uid);
+		if (err) {
+			pr_err("error: setup failed\n");
+			mutex_unlock(&comm_ctx.io_mutex);
+			return err;
+		}
+		comm_ctx.params_ok = 1;
+		comm_ctx.process_pid = user_params.pids[0];
+
+		pr_info("setup success: freq/mafreq: %u/%u, backtrace: %d, pid: %d\n",
+			user_params.freq,
+			user_params.ma_freq,
+			user_params.backtrace,
+			user_params.pids[0]);
+		break;
+
+	case IOCTL_GET_CAP:
+		comm_ctx.control->get_capabilities(&cap);
+		if (copy_to_user((void __user *)ioctl_param, &cap,
+				 sizeof(struct quadd_comm_cap))) {
+			pr_err("error: get_capabilities failed\n");
+			mutex_unlock(&comm_ctx.io_mutex);
+			return -EFAULT;
+		}
+		break;
+
+	case IOCTL_GET_VERSION:
+		strcpy(versions.branch, QUADD_MODULE_BRANCH);
+		strcpy(versions.version, QUADD_MODULE_VERSION);
+
+		versions.samples_version = QUADD_SAMPLES_VERSION;
+		versions.io_version = QUADD_IO_VERSION;
+
+		if (copy_to_user((void __user *)ioctl_param, &versions,
+				 sizeof(struct quadd_module_version))) {
+			pr_err("error: get version failed\n");
+			mutex_unlock(&comm_ctx.io_mutex);
+			return -EFAULT;
+		}
+		break;
+
+	case IOCTL_GET_STATE:
+		comm_ctx.control->get_state(&state);
+
+		state.buffer_size = QUADD_SIZE_RB_BUFFER;
+
+		spin_lock_irqsave(&rb->lock, flags);
+		state.buffer_fill_size =
+			QUADD_SIZE_RB_BUFFER - rb_get_free_space(rb);
+		spin_unlock_irqrestore(&rb->lock, flags);
+
+		if (copy_to_user((void __user *)ioctl_param, &state,
+				 sizeof(struct quadd_module_state))) {
+			pr_err("error: get_state failed\n");
+			mutex_unlock(&comm_ctx.io_mutex);
+			return -EFAULT;
+		}
+		break;
+
+	case IOCTL_START:
+		if (!atomic_cmpxchg(&comm_ctx.active, 0, 1)) {
+			if (!comm_ctx.params_ok) {
+				pr_err("error: params failed\n");
+				atomic_set(&comm_ctx.active, 0);
+				mutex_unlock(&comm_ctx.io_mutex);
+				return -EFAULT;
+			}
+
+			if (comm_ctx.control->start()) {
+				pr_err("error: start failed\n");
+				atomic_set(&comm_ctx.active, 0);
+				mutex_unlock(&comm_ctx.io_mutex);
+				return -EFAULT;
+			}
+			pr_info("Start profiling success\n");
+		}
+		break;
+
+	case IOCTL_STOP:
+		if (atomic_cmpxchg(&comm_ctx.active, 1, 0)) {
+			comm_ctx.control->stop();
+			pr_info("Stop profiling success\n");
+		}
+		break;
+
+	default:
+		pr_err("error: ioctl %u is unsupported in this version of module\n",
+		       ioctl_num);
+		mutex_unlock(&comm_ctx.io_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&comm_ctx.io_mutex);
+
+	return 0;
+}
+
+static void unregister(void)
+{
+	misc_deregister(comm_ctx.misc_dev);
+	kfree(comm_ctx.misc_dev);
+}
+
+static void free_ctx(void)
+{
+	rb_deinit(&comm_ctx.rb);
+}
+
+static const struct file_operations qm_fops = {
+	.read		= device_read,
+	.open		= device_open,
+	.release	= device_release,
+	.unlocked_ioctl	= device_ioctl
+};
+
+static int comm_init(void)
+{
+	int res;
+	struct miscdevice *misc_dev;
+	struct quadd_ring_buffer *rb = &comm_ctx.rb;
+
+	misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL);
+	if (!misc_dev) {
+		pr_err("Error: alloc error\n");
+		return -ENOMEM;
+	}
+
+	misc_dev->minor = MISC_DYNAMIC_MINOR;
+	misc_dev->name = QUADD_DEVICE_NAME;
+	misc_dev->fops = &qm_fops;
+
+	res = misc_register(misc_dev);
+	if (res < 0) {
+		pr_err("Error: misc_register %d\n", res);
+		return res;
+	}
+	comm_ctx.misc_dev = misc_dev;
+
+	mutex_init(&comm_ctx.io_mutex);
+	atomic_set(&comm_ctx.active, 0);
+
+	comm_ctx.params_ok = 0;
+	comm_ctx.process_pid = 0;
+	comm_ctx.nr_users = 0;
+
+	if (rb_init(rb, QUADD_SIZE_RB_BUFFER)) {
+		free_ctx();
+		unregister();
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+struct quadd_comm_data_interface *
+quadd_comm_events_init(struct quadd_comm_control_interface *control)
+{
+	if (comm_init() < 0)
+		return NULL;
+
+	comm_ctx.control = control;
+	return &comm_data;
+}
+
+void quadd_comm_events_exit(void)
+{
+	mutex_lock(&comm_ctx.io_mutex);
+	unregister();
+	free_ctx();
+	mutex_unlock(&comm_ctx.io_mutex);
+}
diff --git a/drivers/misc/tegra-profiler/comm.h b/drivers/misc/tegra-profiler/comm.h
new file mode 100644
index 000000000000..da8e918da38b
--- /dev/null
+++ b/drivers/misc/tegra-profiler/comm.h
@@ -0,0 +1,74 @@
+/*
+ * drivers/misc/tegra-profiler/comm.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_COMM_H__
+#define __QUADD_COMM_H__
+
+#include <linux/types.h>
+
+struct quadd_record_data;
+struct quadd_comm_cap;
+struct quadd_module_state;
+struct miscdevice;
+
+struct quadd_ring_buffer {
+	char *buf;
+	spinlock_t lock;
+
+	size_t size;
+	size_t pos_read;
+	size_t pos_write;
+	size_t fill_count;
+};
+
+struct quadd_parameters;
+
+struct quadd_comm_control_interface {
+	int (*start)(void);
+	void (*stop)(void);
+	int (*set_parameters)(struct quadd_parameters *param,
+			      uid_t *debug_app_uid);
+	void (*get_capabilities)(struct quadd_comm_cap *cap);
+	void (*get_state)(struct quadd_module_state *state);
+};
+
+struct quadd_comm_data_interface {
+	void (*put_sample)(struct quadd_record_data *data, char *extra_data,
+			   unsigned int extra_length);
+	void (*reset)(void);
+};
+
+struct quadd_comm_ctx {
+	struct quadd_comm_control_interface *control;
+	struct quadd_ring_buffer rb;
+
+	atomic_t active;
+
+	struct mutex io_mutex;
+	int nr_users;
+
+	int params_ok;
+	pid_t process_pid;
+	uid_t debug_app_uid;
+
+	struct miscdevice *misc_dev;
+};
+
+struct quadd_comm_data_interface *
+quadd_comm_events_init(struct quadd_comm_control_interface *control);
+void quadd_comm_events_exit(void);
+
+#endif	/* __QUADD_COMM_H__ */
diff --git a/drivers/misc/tegra-profiler/debug.c b/drivers/misc/tegra-profiler/debug.c
new file mode 100644
index 000000000000..e0270a310ae4
--- /dev/null
+++ b/drivers/misc/tegra-profiler/debug.c
@@ -0,0 +1,164 @@
+/*
+ * drivers/misc/tegra-profiler/debug.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <asm/irq_regs.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "debug.h"
+#include "hrt.h"
+#include "tegra.h"
+
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+
+static inline void
+init_sample(struct quadd_record_data *record, struct pt_regs *regs)
+{
+	struct quadd_debug_data *s = &record->debug;
+
+	record->magic = QUADD_RECORD_MAGIC;
+	record->record_type = QUADD_RECORD_TYPE_DEBUG;
+
+	if (!regs)
+		regs = get_irq_regs();
+
+	if (!regs)
+		record->cpu_mode = QUADD_CPU_MODE_NONE;
+	else
+		record->cpu_mode = user_mode(regs) ?
+			QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
+
+	s->cpu = quadd_get_processor_id();
+	s->pid = 0;
+	s->time = quadd_get_time();
+	s->timer_period = 0;
+
+	s->extra_value1 = 0;
+	s->extra_value2 = 0;
+	s->extra_value3 = 0;
+}
+
+void qm_debug_handler_sample(struct pt_regs *regs)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, regs);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_HANDLE;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_timer_forward(struct pt_regs *regs, u64 period)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, regs);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_FORWARD;
+	s->timer_period = period;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_timer_start(struct pt_regs *regs, u64 period)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, regs);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_START;
+	s->timer_period = period;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_timer_cancel(void)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, NULL);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_CANCEL;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void
+qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, NULL);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_SCHED_IN;
+
+	s->extra_value1 = prev_pid;
+	s->extra_value2 = current_pid;
+	s->extra_value3 = prev_nr_active;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, NULL);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_READ_COUNTER;
+
+	s->extra_value1 = event_id;
+	s->extra_value2 = prev_val;
+	s->extra_value3 = val;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_start_source(int source_type)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, NULL);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_START;
+	s->extra_value1 = source_type;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_stop_source(int source_type)
+{
+	struct quadd_record_data record;
+	struct quadd_debug_data *s = &record.debug;
+
+	init_sample(&record, NULL);
+
+	s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_STOP;
+	s->extra_value1 = source_type;
+
+	quadd_put_sample(&record, NULL, 0);
+}
+
+#endif	/* QM_DEBUG_SAMPLES_ENABLE */
diff --git a/drivers/misc/tegra-profiler/debug.h b/drivers/misc/tegra-profiler/debug.h
new file mode 100644
index 000000000000..ff62919eb243
--- /dev/null
+++ b/drivers/misc/tegra-profiler/debug.h
@@ -0,0 +1,87 @@
+/*
+ * drivers/misc/tegra-profiler/debug.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_DEBUG_H
+#define __QUADD_DEBUG_H
+
+#include <linux/tegra_profiler.h>
+
+/* #define QM_DEBUG_SAMPLES_ENABLE 1 */
+
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+void qm_debug_handler_sample(struct pt_regs *regs);
+void qm_debug_timer_forward(struct pt_regs *regs, u64 period);
+void qm_debug_timer_start(struct pt_regs *regs, u64 period);
+void qm_debug_timer_cancel(void);
+void qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid,
+			    int prev_nr_active);
+void qm_debug_read_counter(int event_id, u32 prev_val, u32 val);
+void qm_debug_start_source(int source_type);
+void qm_debug_stop_source(int source_type);
+#else
+static inline void qm_debug_handler_sample(struct pt_regs *regs)
+{
+}
+static inline void qm_debug_timer_forward(struct pt_regs *regs, u64 period)
+{
+}
+static inline void qm_debug_timer_start(struct pt_regs *regs, u64 period)
+{
+}
+static inline void qm_debug_timer_cancel(void)
+{
+}
+static inline void
+qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
+{
+}
+static inline void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
+{
+}
+static inline void qm_debug_start_source(int source_type)
+{
+}
+static inline void qm_debug_stop_source(int source_type)
+{
+}
+#endif
+
+void quadd_test_delay(void);
+
+#define QM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+static inline char *
+quadd_get_event_str(int event)
+{
+	static char *str[] = {
+		[QUADD_EVENT_TYPE_CPU_CYCLES]		= "cpu-cycles",
+
+		[QUADD_EVENT_TYPE_INSTRUCTIONS]		= "instructions",
+		[QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS]	= "branch_instruction",
+		[QUADD_EVENT_TYPE_BRANCH_MISSES]	= "branch_misses",
+		[QUADD_EVENT_TYPE_BUS_CYCLES]		= "bus-cycles",
+
+		[QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES]	= "l1_d_read",
+		[QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES]	= "l1_d_write",
+		[QUADD_EVENT_TYPE_L1_ICACHE_MISSES]		= "l1_i",
+
+		[QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES]	= "l2_d_read",
+		[QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES]	= "l2_d_write",
+		[QUADD_EVENT_TYPE_L2_ICACHE_MISSES]		= "l2_i",
+	};
+	return (event < QM_ARRAY_SIZE(str)) ? str[event] : "invalid event";
+}
+
+#endif	/* __QUADD_DEBUG_H */
diff --git a/drivers/misc/tegra-profiler/hrt.c b/drivers/misc/tegra-profiler/hrt.c
new file mode 100644
index 000000000000..56d8b84ae75d
--- /dev/null
+++ b/drivers/misc/tegra-profiler/hrt.c
@@ -0,0 +1,620 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+#include <asm/cputype.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/ratelimit.h>
+#include <asm/irq_regs.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "mmap.h"
+#include "ma.h"
+#include "power_clk.h"
+#include "tegra.h"
+#include "debug.h"
+
+static struct quadd_hrt_ctx hrt;
+
+static void read_all_sources(struct pt_regs *regs, pid_t pid);
+
+static void sample_time_prepare(void);
+static void sample_time_finish(void);
+static void sample_time_reset(struct quadd_cpu_context *cpu_ctx);
+
+static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
+{
+	struct pt_regs *regs;
+
+	regs = get_irq_regs();
+
+	if (hrt.active == 0)
+		return HRTIMER_NORESTART;
+
+	qm_debug_handler_sample(regs);
+
+	if (regs) {
+		sample_time_prepare();
+		read_all_sources(regs, -1);
+		sample_time_finish();
+	}
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
+	qm_debug_timer_forward(regs, hrt.sample_period);
+
+	return HRTIMER_RESTART;
+}
+
+static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+	u64 period = hrt.sample_period;
+
+	sample_time_reset(cpu_ctx);
+
+	hrtimer_start(&cpu_ctx->hrtimer, ns_to_ktime(period),
+		      HRTIMER_MODE_REL_PINNED);
+	qm_debug_timer_start(NULL, period);
+}
+
+static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+	hrtimer_cancel(&cpu_ctx->hrtimer);
+	qm_debug_timer_cancel();
+}
+
+static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+	sample_time_reset(cpu_ctx);
+
+	hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	cpu_ctx->hrtimer.function = hrtimer_handler;
+}
+
+u64 quadd_get_time(void)
+{
+	struct timespec ts;
+
+	do_posix_clock_monotonic_gettime(&ts);
+	return timespec_to_ns(&ts);
+}
+
+static u64 get_sample_time(void)
+{
+#ifndef QUADD_USE_CORRECT_SAMPLE_TS
+	return quadd_get_time();
+#else
+	struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+	return cpu_ctx->current_time;
+#endif
+}
+
+static void sample_time_prepare(void)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+	struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+
+	if (cpu_ctx->prev_time == ULLONG_MAX)
+		cpu_ctx->current_time = quadd_get_time();
+	else
+		cpu_ctx->current_time = cpu_ctx->prev_time + hrt.sample_period;
+#endif
+}
+
+static void sample_time_finish(void)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+	struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+	cpu_ctx->prev_time = cpu_ctx->current_time;
+#endif
+}
+
+static void sample_time_reset(struct quadd_cpu_context *cpu_ctx)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+	cpu_ctx->prev_time = ULLONG_MAX;
+	cpu_ctx->current_time = ULLONG_MAX;
+#endif
+}
+
+static void put_header(void)
+{
+	int power_rate_period;
+	struct quadd_record_data record;
+	struct quadd_header_data *hdr = &record.hdr;
+	struct quadd_parameters *param = &hrt.quadd_ctx->param;
+	struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+
+	record.magic = QUADD_RECORD_MAGIC;
+	record.record_type = QUADD_RECORD_TYPE_HEADER;
+	record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+	hdr->version = QUADD_SAMPLES_VERSION;
+
+	hdr->backtrace = param->backtrace;
+	hdr->use_freq = param->use_freq;
+	hdr->system_wide = param->system_wide;
+
+	/* TODO: dynamically */
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+	hdr->debug_samples = 1;
+#else
+	hdr->debug_samples = 0;
+#endif
+
+	hdr->period = hrt.sample_period;
+	hdr->ma_period = hrt.ma_period;
+
+	hdr->power_rate = quadd_power_clk_is_enabled(&power_rate_period);
+	hdr->power_rate_period = power_rate_period;
+
+	comm->put_sample(&record, NULL, 0);
+}
+
+void quadd_put_sample(struct quadd_record_data *data,
+		      char *extra_data, unsigned int extra_length)
+{
+	struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+
+	if (data->record_type == QUADD_RECORD_TYPE_SAMPLE &&
+		data->sample.period > 0x7FFFFFFF) {
+		struct quadd_sample_data *sample = &data->sample;
+		pr_err_once("very big period, sample id: %d\n",
+			    sample->event_id);
+		return;
+	}
+
+	comm->put_sample(data, extra_data, extra_length);
+	atomic64_inc(&hrt.counter_samples);
+}
+
+static int get_sample_data(struct event_data *event,
+			   struct pt_regs *regs,
+			   struct quadd_sample_data *sample)
+{
+	u32 period;
+	u32 prev_val, val;
+
+	prev_val = event->prev_val;
+	val = event->val;
+
+	sample->event_id = event->event_id;
+
+	sample->ip = instruction_pointer(regs);
+	sample->cpu = quadd_get_processor_id();
+	sample->time = get_sample_time();
+
+	if (prev_val <= val)
+		period = val - prev_val;
+	else
+		period = QUADD_U32_MAX - prev_val + val;
+
+	if (event->event_source == QUADD_EVENT_SOURCE_PL310) {
+		int nr_current_active = atomic_read(&hrt.nr_active_all_core);
+		if (nr_current_active > 1)
+			period = period / nr_current_active;
+	}
+
+	sample->period = period;
+	return 0;
+}
+
+static char *get_mmap_data(struct pt_regs *regs,
+			   struct quadd_mmap_data *sample,
+			   unsigned int *extra_length)
+{
+	struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+	return quadd_get_mmap(cpu_ctx, regs, sample, extra_length);
+}
+
+static void read_source(struct quadd_event_source_interface *source,
+			struct pt_regs *regs, pid_t pid)
+{
+	int nr_events, i;
+	struct event_data events[QUADD_MAX_COUNTERS];
+	struct quadd_record_data record_data;
+	struct quadd_thread_data *t_data;
+	char *extra_data = NULL;
+	unsigned int extra_length = 0, callchain_nr = 0;
+	struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+	struct quadd_callchain *callchain_data = &cpu_ctx->callchain_data;
+
+	if (!source)
+		return;
+
+	nr_events = source->read(events);
+
+	if (nr_events == 0 || nr_events > QUADD_MAX_COUNTERS) {
+		pr_err_once("Error number of counters: %d, source: %p\n",
+				nr_events, source);
+		return;
+	}
+
+	if (user_mode(regs) && hrt.quadd_ctx->param.backtrace) {
+		callchain_nr = quadd_get_user_callchain(regs, callchain_data);
+		if (callchain_nr > 0) {
+			extra_data = (char *)cpu_ctx->callchain_data.callchain;
+			extra_length = callchain_nr * sizeof(u32);
+		}
+	}
+
+	for (i = 0; i < nr_events; i++) {
+		if (get_sample_data(&events[i], regs, &record_data.sample))
+			return;
+
+		record_data.magic = QUADD_RECORD_MAGIC;
+		record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
+		record_data.cpu_mode = user_mode(regs) ?
+			QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
+
+		record_data.sample.callchain_nr = callchain_nr;
+
+		if (pid > 0) {
+			record_data.sample.pid = pid;
+			quadd_put_sample(&record_data, extra_data,
+					 extra_length);
+		} else {
+			t_data = &cpu_ctx->active_thread;
+
+			if (atomic_read(&cpu_ctx->nr_active) > 0) {
+				record_data.sample.pid = t_data->pid;
+				quadd_put_sample(&record_data, extra_data,
+						 extra_length);
+			}
+		}
+	}
+}
+
+static void read_all_sources(struct pt_regs *regs, pid_t pid)
+{
+	struct quadd_record_data record_data;
+	struct quadd_ctx *ctx = hrt.quadd_ctx;
+	unsigned int extra_length;
+	char *extra_data;
+
+	if (!regs)
+		return;
+
+	extra_data = get_mmap_data(regs, &record_data.mmap, &extra_length);
+	if (extra_data && extra_length > 0) {
+		record_data.magic = QUADD_RECORD_MAGIC;
+		record_data.record_type = QUADD_RECORD_TYPE_MMAP;
+		record_data.cpu_mode = QUADD_CPU_MODE_USER;
+
+		record_data.mmap.filename_length = extra_length;
+		record_data.mmap.pid = pid > 0 ? pid : ctx->param.pids[0];
+
+		quadd_put_sample(&record_data, extra_data, extra_length);
+	} else {
+		record_data.mmap.filename_length = 0;
+	}
+
+	if (ctx->pmu && ctx->pmu_info.active)
+		read_source(ctx->pmu, regs, pid);
+
+	if (ctx->pl310 && ctx->pl310_info.active)
+		read_source(ctx->pl310, regs, pid);
+}
+
+static inline int is_profile_process(pid_t pid)
+{
+	int i;
+	pid_t profile_pid;
+	struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+	for (i = 0; i < ctx->param.nr_pids; i++) {
+		profile_pid = ctx->param.pids[i];
+		if (profile_pid == pid)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
+{
+	struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
+
+	if (t_data->pid > 0 ||
+		atomic_read(&cpu_ctx->nr_active) > 0) {
+		pr_warn_once("Warning for thread: %d\n", (int)pid);
+		return 0;
+	}
+
+	t_data->pid = pid;
+	t_data->tgid = tgid;
+	return 1;
+}
+
+static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
+{
+	struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
+
+	if (t_data->pid < 0)
+		return 0;
+
+	if (t_data->pid == pid) {
+		t_data->pid = -1;
+		t_data->tgid = -1;
+		return 1;
+	}
+
+	pr_warn_once("Warning for thread: %d\n", (int)pid);
+	return 0;
+}
+
+static int task_sched_in(struct kprobe *kp, struct pt_regs *regs)
+{
+	int n, prev_flag, current_flag;
+	struct task_struct *prev, *task;
+	int prev_nr_active, new_nr_active;
+	struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+	struct quadd_ctx *ctx = hrt.quadd_ctx;
+	struct event_data events[QUADD_MAX_COUNTERS];
+	/* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
+
+	if (hrt.active == 0)
+		return 0;
+
+	prev = (struct task_struct *)regs->ARM_r1;
+	task = current;
+/*
+	if (__ratelimit(&ratelimit_state))
+		pr_info("cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
+			quadd_get_processor_id(), (unsigned int)prev->pid,
+			(unsigned int)prev->tgid, (unsigned int)task->pid,
+			(unsigned int)task->tgid);
+*/
+	if (!prev || !prev->real_parent || !prev->group_leader ||
+		prev->group_leader->tgid != prev->tgid) {
+		pr_err_once("Warning\n");
+		return 0;
+	}
+
+	prev_flag = is_profile_process(prev->tgid);
+	current_flag = is_profile_process(task->tgid);
+
+	if (prev_flag || current_flag) {
+		prev_nr_active = atomic_read(&cpu_ctx->nr_active);
+		qm_debug_task_sched_in(prev->pid, task->pid, prev_nr_active);
+
+		if (prev_flag) {
+			n = remove_active_thread(cpu_ctx, prev->pid);
+			atomic_sub(n, &cpu_ctx->nr_active);
+		}
+		if (current_flag) {
+			add_active_thread(cpu_ctx, task->pid, task->tgid);
+			atomic_inc(&cpu_ctx->nr_active);
+		}
+
+		new_nr_active = atomic_read(&cpu_ctx->nr_active);
+		if (prev_nr_active != new_nr_active) {
+			if (prev_nr_active == 0) {
+				if (ctx->pmu)
+					ctx->pmu->start();
+
+				if (ctx->pl310)
+					ctx->pl310->read(events);
+
+				start_hrtimer(cpu_ctx);
+				atomic_inc(&hrt.nr_active_all_core);
+			} else if (new_nr_active == 0) {
+				cancel_hrtimer(cpu_ctx);
+				atomic_dec(&hrt.nr_active_all_core);
+
+				if (ctx->pmu)
+					ctx->pmu->stop();
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int handler_fault(struct kprobe *kp, struct pt_regs *regs, int trapnr)
+{
+	pr_err_once("addr: %p, symbol: %s\n", kp->addr, kp->symbol_name);
+	return 0;
+}
+
+static int start_instr(void)
+{
+	int err;
+
+	memset(&hrt.kp_in, 0, sizeof(struct kprobe));
+
+	hrt.kp_in.pre_handler = task_sched_in;
+	hrt.kp_in.fault_handler = handler_fault;
+	hrt.kp_in.addr = 0;
+	hrt.kp_in.symbol_name = QUADD_HRT_SCHED_IN_FUNC;
+
+	err = register_kprobe(&hrt.kp_in);
+	if (err) {
+		pr_err("register_kprobe error, symbol_name: %s\n",
+			hrt.kp_in.symbol_name);
+		return err;
+	}
+	return 0;
+}
+
+static void stop_instr(void)
+{
+	unregister_kprobe(&hrt.kp_in);
+}
+
+static int init_instr(void)
+{
+	int err;
+
+	err = start_instr();
+	if (err) {
+		pr_err("Init instr failed\n");
+		return err;
+	}
+	stop_instr();
+	return 0;
+}
+
+static int deinit_instr(void)
+{
+	return 0;
+}
+
+static void reset_cpu_ctx(void)
+{
+	int cpu_id;
+	struct quadd_cpu_context *cpu_ctx;
+	struct quadd_thread_data *t_data;
+
+	for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
+		cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
+		t_data = &cpu_ctx->active_thread;
+
+		atomic_set(&cpu_ctx->nr_active, 0);
+
+		t_data->pid = -1;
+		t_data->tgid = -1;
+
+		sample_time_reset(cpu_ctx);
+	}
+}
+
+int quadd_hrt_start(void)
+{
+	int err;
+	u64 period;
+	long freq;
+	struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+	freq = ctx->param.freq;
+	freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
+	period = NSEC_PER_SEC / freq;
+	hrt.sample_period = period;
+
+	if (ctx->param.ma_freq > 0)
+		hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
+	else
+		hrt.ma_period = 0;
+
+	atomic64_set(&hrt.counter_samples, 0);
+
+	reset_cpu_ctx();
+
+	err = start_instr();
+	if (err) {
+		pr_err("error: start_instr is failed\n");
+		return err;
+	}
+
+	put_header();
+
+	if (ctx->pl310)
+		ctx->pl310->start();
+
+	quadd_ma_start(&hrt);
+
+	hrt.active = 1;
+
+	pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
+	return 0;
+}
+
+void quadd_hrt_stop(void)
+{
+	struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+	pr_info("Stop hrt, number of samples: %llu\n",
+		atomic64_read(&hrt.counter_samples));
+
+	if (ctx->pl310)
+		ctx->pl310->stop();
+
+	quadd_ma_stop(&hrt);
+
+	hrt.active = 0;
+	stop_instr();
+
+	atomic64_set(&hrt.counter_samples, 0);
+
+	/* reset_cpu_ctx(); */
+}
+
+void quadd_hrt_deinit(void)
+{
+	if (hrt.active)
+		quadd_hrt_stop();
+
+	deinit_instr();
+	free_percpu(hrt.cpu_ctx);
+}
+
+void quadd_hrt_get_state(struct quadd_module_state *state)
+{
+	state->nr_all_samples = atomic64_read(&hrt.counter_samples);
+	state->nr_skipped_samples = 0;
+}
+
+struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
+{
+	int cpu_id;
+	u64 period;
+	long freq;
+	struct quadd_cpu_context *cpu_ctx;
+
+	hrt.quadd_ctx = ctx;
+	hrt.active = 0;
+
+	freq = ctx->param.freq;
+	freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
+	period = NSEC_PER_SEC / freq;
+	hrt.sample_period = period;
+
+	if (ctx->param.ma_freq > 0)
+		hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
+	else
+		hrt.ma_period = 0;
+
+	atomic64_set(&hrt.counter_samples, 0);
+
+	hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
+	if (!hrt.cpu_ctx)
+		return NULL;
+
+	for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
+		cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
+
+		atomic_set(&cpu_ctx->nr_active, 0);
+
+		cpu_ctx->active_thread.pid = -1;
+		cpu_ctx->active_thread.tgid = -1;
+
+		init_hrtimer(cpu_ctx);
+	}
+
+	if (init_instr())
+		return NULL;
+
+	return &hrt;
+}
diff --git a/drivers/misc/tegra-profiler/hrt.h b/drivers/misc/tegra-profiler/hrt.h
new file mode 100644
index 000000000000..f113b8846a02
--- /dev/null
+++ b/drivers/misc/tegra-profiler/hrt.h
@@ -0,0 +1,94 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_HRT_H
+#define __QUADD_HRT_H
+
+#define QUADD_MAX_STACK_DEPTH		64
+
+#ifdef __KERNEL__
+
+#include <linux/hrtimer.h>
+#include <linux/limits.h>
+#include <linux/kprobes.h>
+
+#include "backtrace.h"
+
+#define QUADD_USE_CORRECT_SAMPLE_TS	1
+
+struct quadd_thread_data {
+	pid_t pid;
+	pid_t tgid;
+};
+
+struct quadd_cpu_context {
+	struct hrtimer hrtimer;
+
+	struct quadd_callchain callchain_data;
+	char mmap_filename[PATH_MAX];
+
+	struct quadd_thread_data active_thread;
+	atomic_t nr_active;
+
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+	u64 prev_time;
+	u64 current_time;
+#endif
+};
+
+struct quadd_hrt_ctx {
+	struct quadd_cpu_context * __percpu cpu_ctx;
+	u64 sample_period;
+
+	struct kprobe kp_in;
+	/* struct kinstr ki_out; */
+
+	struct quadd_ctx *quadd_ctx;
+
+	int active;
+	atomic64_t counter_samples;
+	atomic_t nr_active_all_core;
+
+	struct timer_list ma_timer;
+	unsigned int ma_period;
+
+	unsigned long vm_size_prev;
+	unsigned long rss_size_prev;
+};
+
+#define QUADD_HRT_MIN_FREQ	110
+
+#define QUADD_U32_MAX (~(__u32)0)
+
+struct quadd_hrt_ctx;
+struct quadd_record_data;
+struct quadd_module_state;
+
+struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx);
+void quadd_hrt_deinit(void);
+
+int quadd_hrt_start(void);
+void quadd_hrt_stop(void);
+
+void quadd_put_sample(struct quadd_record_data *data,
+		      char *extra_data, unsigned int extra_length);
+
+void quadd_hrt_get_state(struct quadd_module_state *state);
+u64 quadd_get_time(void);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __QUADD_HRT_H */
diff --git a/drivers/misc/tegra-profiler/ma.c b/drivers/misc/tegra-profiler/ma.c
new file mode 100644
index 000000000000..2021954ace58
--- /dev/null
+++ b/drivers/misc/tegra-profiler/ma.c
@@ -0,0 +1,132 @@
+/*
+ * drivers/misc/tegra-profiler/ma.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "ma.h"
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "debug.h"
+
+static void make_sample(struct quadd_hrt_ctx *hrt_ctx,
+			pid_t pid, unsigned long vm_size,
+			unsigned long rss_size)
+{
+	struct quadd_record_data record;
+	struct quadd_ma_data *ma = &record.ma;
+	struct quadd_comm_data_interface *comm = hrt_ctx->quadd_ctx->comm;
+
+	record.magic = QUADD_RECORD_MAGIC;
+	record.record_type = QUADD_RECORD_TYPE_MA;
+	record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+	ma->pid = pid;
+	ma->time = quadd_get_time();
+
+	ma->vm_size = vm_size << PAGE_SHIFT;
+	ma->rss_size = rss_size << PAGE_SHIFT;
+/*
+	pr_debug("vm: %llu bytes (%llu mb), rss: %llu bytes (%llu mb)\n",
+		ma->vm_size, ma->vm_size / 0x100000,
+		ma->rss_size, ma->rss_size / 0x100000);
+*/
+	comm->put_sample(&record, NULL, 0);
+}
+
+static void check_ma(struct quadd_hrt_ctx *hrt_ctx)
+{
+	pid_t pid;
+	struct pid *pid_s;
+	struct task_struct *task = NULL;
+	struct mm_struct *mm;
+	struct quadd_ctx *quadd_ctx = hrt_ctx->quadd_ctx;
+	unsigned long vm_size, rss_size, total_vm;
+
+	pid = quadd_ctx->param.pids[0];
+
+	rcu_read_lock();
+	pid_s = find_vpid(pid);
+	if (pid_s)
+		task = pid_task(pid_s, PIDTYPE_PID);
+	rcu_read_unlock();
+	if (!task)
+		return;
+
+	mm = task->mm;
+	if (!mm)
+		return;
+
+	total_vm = mm->total_vm;
+	vm_size = total_vm - mm->reserved_vm;
+	rss_size = get_mm_rss(mm);
+
+	if (vm_size != hrt_ctx->vm_size_prev ||
+	    rss_size != hrt_ctx->rss_size_prev) {
+		make_sample(hrt_ctx, pid, vm_size, rss_size);
+		hrt_ctx->vm_size_prev = vm_size;
+		hrt_ctx->rss_size_prev = rss_size;
+	}
+}
+
+static void timer_interrupt(unsigned long data)
+{
+	struct quadd_hrt_ctx *hrt_ctx = (struct quadd_hrt_ctx *)data;
+	struct timer_list *timer = &hrt_ctx->ma_timer;
+
+	if (hrt_ctx->active == 0)
+		return;
+
+	check_ma(hrt_ctx);
+
+	timer->expires = jiffies + msecs_to_jiffies(hrt_ctx->ma_period);
+	add_timer(timer);
+}
+
+void quadd_ma_start(struct quadd_hrt_ctx *hrt_ctx)
+{
+	struct timer_list *timer = &hrt_ctx->ma_timer;
+
+	if (hrt_ctx->ma_period == 0) {
+		pr_info("QuadD MA is disabled\n");
+		return;
+	}
+	pr_info("QuadD MA is started, interval: %u msec\n",
+		hrt_ctx->ma_period);
+
+	hrt_ctx->vm_size_prev = 0;
+	hrt_ctx->rss_size_prev = 0;
+
+	init_timer(timer);
+	timer->function = timer_interrupt;
+	timer->expires = jiffies + msecs_to_jiffies(hrt_ctx->ma_period);
+	timer->data = (unsigned long)hrt_ctx;
+	add_timer(timer);
+}
+
+void quadd_ma_stop(struct quadd_hrt_ctx *hrt_ctx)
+{
+	if (hrt_ctx->ma_period > 0) {
+		pr_info("QuadD MA is stopped\n");
+		del_timer_sync(&hrt_ctx->ma_timer);
+	}
+}
diff --git a/drivers/misc/tegra-profiler/ma.h b/drivers/misc/tegra-profiler/ma.h
new file mode 100644
index 000000000000..be892b650927
--- /dev/null
+++ b/drivers/misc/tegra-profiler/ma.h
@@ -0,0 +1,25 @@
+/*
+ * drivers/misc/tegra-profiler/ma.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_MA_H
+#define __QUADD_MA_H
+
+struct quadd_hrt_ctx;
+
+void quadd_ma_start(struct quadd_hrt_ctx *hrt_ctx);
+void quadd_ma_stop(struct quadd_hrt_ctx *hrt_ctx);
+
+#endif	/* __QUADD_MA_H */
diff --git a/drivers/misc/tegra-profiler/main.c b/drivers/misc/tegra-profiler/main.c
new file mode 100644
index 000000000000..71b9554e92d3
--- /dev/null
+++ b/drivers/misc/tegra-profiler/main.c
@@ -0,0 +1,471 @@
+/*
+ * drivers/misc/tegra-profiler/main.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "armv7_pmu.h"
+#include "hrt.h"
+#include "pl310.h"
+#include "comm.h"
+#include "mmap.h"
+#include "debug.h"
+#include "tegra.h"
+#include "power_clk.h"
+#include "auth.h"
+#include "version.h"
+
+static struct quadd_ctx ctx;
+
+static int get_default_properties(void)
+{
+	ctx.param.freq = 100;
+	ctx.param.ma_freq = 50;
+	ctx.param.backtrace = 1;
+	ctx.param.use_freq = 1;
+	ctx.param.system_wide = 1;
+	ctx.param.power_rate_freq = 0;
+	ctx.param.debug_samples = 0;
+
+	ctx.param.pids[0] = 0;
+	ctx.param.nr_pids = 1;
+
+	return 0;
+}
+
+static int start(void)
+{
+	int err;
+
+	if (!atomic_cmpxchg(&ctx.started, 0, 1)) {
+		if (ctx.pmu) {
+			err = ctx.pmu->enable();
+			if (err) {
+				pr_err("error: pmu enable\n");
+				return err;
+			}
+		}
+
+		if (ctx.pl310) {
+			err = ctx.pl310->enable();
+			if (err) {
+				pr_err("error: pl310 enable\n");
+				return err;
+			}
+		}
+
+		quadd_mmap_reset();
+		ctx.comm->reset();
+
+		err = quadd_power_clk_start();
+		if (err < 0) {
+			pr_err("error: power_clk start\n");
+			return err;
+		}
+
+		err = quadd_hrt_start();
+		if (err) {
+			pr_err("error: hrt start\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void stop(void)
+{
+	if (atomic_cmpxchg(&ctx.started, 1, 0)) {
+		quadd_hrt_stop();
+
+		quadd_mmap_reset();
+		ctx.comm->reset();
+
+		quadd_power_clk_stop();
+
+		if (ctx.pmu)
+			ctx.pmu->disable();
+
+		if (ctx.pl310)
+			ctx.pl310->disable();
+	}
+}
+
+static inline int is_event_supported(struct source_info *si, int event)
+{
+	int i;
+	int nr = si->nr_supported_events;
+	int *events = si->supported_events;
+
+	for (i = 0; i < nr; i++) {
+		if (event == events[i])
+			return 1;
+	}
+	return 0;
+}
+
+static int set_parameters(struct quadd_parameters *param, uid_t *debug_app_uid)
+{
+	int i, err;
+	int pmu_events_id[QUADD_MAX_COUNTERS];
+	int pl310_events_id;
+	int nr_pmu = 0, nr_pl310 = 0;
+	int uid = 0;
+	struct task_struct *task;
+
+	if (ctx.param.freq != 100 && ctx.param.freq != 1000 &&
+	    ctx.param.freq != 10000)
+		return -EINVAL;
+
+	ctx.param.freq = param->freq;
+	ctx.param.ma_freq = param->ma_freq;
+	ctx.param.backtrace = param->backtrace;
+	ctx.param.use_freq = param->use_freq;
+	ctx.param.system_wide = param->system_wide;
+	ctx.param.power_rate_freq = param->power_rate_freq;
+	ctx.param.debug_samples = param->debug_samples;
+
+	/* Currently only one process */
+	if (param->nr_pids != 1)
+		return -EINVAL;
+
+	rcu_read_lock();
+	task = pid_task(find_vpid(param->pids[0]), PIDTYPE_PID);
+	rcu_read_unlock();
+	if (!task) {
+		pr_err("Process not found: %u\n", param->pids[0]);
+		return -ESRCH;
+	}
+
+	pr_info("owner/task uids: %u/%u\n", current_fsuid(), task_uid(task));
+	if (!capable(CAP_SYS_ADMIN)) {
+		if (current_fsuid() != task_uid(task)) {
+			uid = quadd_auth_check_debug_flag(param->package_name);
+			if (uid < 0) {
+				pr_err("Error: QuadD security service\n");
+				return uid;
+			} else if (uid == 0) {
+				pr_err("Error: app is not debuggable\n");
+				return -EACCES;
+			}
+
+			*debug_app_uid = uid;
+			pr_info("debug_app_uid: %u\n", uid);
+		}
+	}
+
+	for (i = 0; i < param->nr_pids; i++)
+		ctx.param.pids[i] = param->pids[i];
+
+	ctx.param.nr_pids = param->nr_pids;
+
+	for (i = 0; i < param->nr_events; i++) {
+		int event = param->events[i];
+
+		if (ctx.pmu && ctx.pmu_info.nr_supported_events > 0
+			&& is_event_supported(&ctx.pmu_info, event)) {
+			pmu_events_id[nr_pmu++] = param->events[i];
+
+			pr_info("PMU active event: %s\n",
+				quadd_get_event_str(event));
+		} else if (ctx.pl310 &&
+			   ctx.pl310_info.nr_supported_events > 0 &&
+			   is_event_supported(&ctx.pl310_info, event)) {
+			pl310_events_id = param->events[i];
+
+			pr_info("PL310 active event: %s\n",
+				quadd_get_event_str(event));
+
+			if (nr_pl310++ > 1) {
+				pr_err("error: multiply pl310 events\n");
+				return -EINVAL;
+			}
+		} else {
+			pr_err("Bad event: %s\n",
+			       quadd_get_event_str(event));
+			return -EINVAL;
+		}
+	}
+
+	if (ctx.pmu) {
+		if (nr_pmu > 0) {
+			err = ctx.pmu->set_events(pmu_events_id, nr_pmu);
+			if (err) {
+				pr_err("PMU set parameters: error\n");
+				return err;
+			}
+			ctx.pmu_info.active = 1;
+		} else {
+			ctx.pmu_info.active = 0;
+			ctx.pmu->set_events(NULL, 0);
+		}
+	}
+
+	if (ctx.pl310) {
+		if (nr_pl310 == 1) {
+			err = ctx.pl310->set_events(&pl310_events_id, 1);
+			if (err) {
+				pr_info("pl310 set_parameters: error\n");
+				return err;
+			}
+			ctx.pl310_info.active = 1;
+		} else {
+			ctx.pl310_info.active = 0;
+			ctx.pl310->set_events(NULL, 0);
+		}
+	}
+	pr_info("New parameters have been applied\n");
+
+	return 0;
+}
+
+static void get_capabilities(struct quadd_comm_cap *cap)
+{
+	int i, event;
+	struct quadd_events_cap *events_cap = &cap->events_cap;
+
+	cap->pmu = ctx.pmu ? 1 : 0;
+
+	cap->l2_cache = 0;
+	if (ctx.pl310) {
+		cap->l2_cache = 1;
+		cap->l2_multiple_events = 0;
+	} else if (ctx.pmu) {
+		struct source_info *s = &ctx.pmu_info;
+		for (i = 0; i < s->nr_supported_events; i++) {
+			event = s->supported_events[i];
+			if (event == QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES ||
+			    event == QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES ||
+			    event == QUADD_EVENT_TYPE_L2_ICACHE_MISSES) {
+				cap->l2_cache = 1;
+				cap->l2_multiple_events = 1;
+				break;
+			}
+		}
+	}
+
+	events_cap->cpu_cycles = 0;
+	events_cap->l1_dcache_read_misses = 0;
+	events_cap->l1_dcache_write_misses = 0;
+	events_cap->l1_icache_misses = 0;
+
+	events_cap->instructions = 0;
+	events_cap->branch_instructions = 0;
+	events_cap->branch_misses = 0;
+	events_cap->bus_cycles = 0;
+
+	events_cap->l2_dcache_read_misses = 0;
+	events_cap->l2_dcache_write_misses = 0;
+	events_cap->l2_icache_misses = 0;
+
+	if (ctx.pl310) {
+		struct source_info *s = &ctx.pl310_info;
+		for (i = 0; i < s->nr_supported_events; i++) {
+			int event = s->supported_events[i];
+
+			switch (event) {
+			case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+				events_cap->l2_dcache_read_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+				events_cap->l2_dcache_write_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+				events_cap->l2_icache_misses = 1;
+				break;
+
+			default:
+				BUG();
+				break;
+			}
+		}
+	}
+
+	if (ctx.pmu) {
+		struct source_info *s = &ctx.pmu_info;
+		for (i = 0; i < s->nr_supported_events; i++) {
+			int event = s->supported_events[i];
+
+			switch (event) {
+			case QUADD_EVENT_TYPE_CPU_CYCLES:
+				events_cap->cpu_cycles = 1;
+				break;
+			case QUADD_EVENT_TYPE_INSTRUCTIONS:
+				events_cap->instructions = 1;
+				break;
+			case QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS:
+				events_cap->branch_instructions = 1;
+				break;
+			case QUADD_EVENT_TYPE_BRANCH_MISSES:
+				events_cap->branch_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_BUS_CYCLES:
+				events_cap->bus_cycles = 1;
+				break;
+
+			case QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES:
+				events_cap->l1_dcache_read_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES:
+				events_cap->l1_dcache_write_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_L1_ICACHE_MISSES:
+				events_cap->l1_icache_misses = 1;
+				break;
+
+			case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+				events_cap->l2_dcache_read_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+				events_cap->l2_dcache_write_misses = 1;
+				break;
+			case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+				events_cap->l2_icache_misses = 1;
+				break;
+
+			default:
+				BUG();
+				break;
+			}
+		}
+	}
+
+	cap->tegra_lp_cluster = quadd_is_cpu_with_lp_cluster();
+	cap->power_rate = 1;
+	cap->blocked_read = 0;
+}
+
+static void get_state(struct quadd_module_state *state)
+{
+	quadd_hrt_get_state(state);
+}
+
+static struct quadd_comm_control_interface control = {
+	.start			= start,
+	.stop			= stop,
+	.set_parameters		= set_parameters,
+	.get_capabilities	= get_capabilities,
+	.get_state		= get_state,
+};
+
+static int __init quadd_module_init(void)
+{
+	int i, nr_events, err;
+	int *events;
+
+	pr_info("Branch: %s\n", QUADD_MODULE_BRANCH);
+	pr_info("Version: %s\n", QUADD_MODULE_VERSION);
+	pr_info("Samples version: %d\n", QUADD_SAMPLES_VERSION);
+	pr_info("IO version: %d\n", QUADD_IO_VERSION);
+
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+	pr_info("############## DEBUG VERSION! ##############\n");
+#endif
+	atomic_set(&ctx.started, 0);
+
+	get_default_properties();
+
+	ctx.pmu_info.active = 0;
+	ctx.pl310_info.active = 0;
+
+	ctx.pmu = quadd_armv7_pmu_init();
+	if (!ctx.pmu) {
+		pr_err("PMU init failed\n");
+		return -ENODEV;
+	} else {
+		events = ctx.pmu_info.supported_events;
+		nr_events = ctx.pmu->get_supported_events(events);
+		ctx.pmu_info.nr_supported_events = nr_events;
+
+		pr_info("PMU: amount of events: %d\n", nr_events);
+
+		for (i = 0; i < nr_events; i++)
+			pr_info("PMU event: %s\n",
+				quadd_get_event_str(events[i]));
+	}
+
+	ctx.pl310 = quadd_l2x0_events_init();
+	if (ctx.pl310) {
+		events = ctx.pl310_info.supported_events;
+		nr_events = ctx.pl310->get_supported_events(events);
+		ctx.pl310_info.nr_supported_events = nr_events;
+
+		pr_info("pl310 success, amount of events: %d\n",
+			nr_events);
+
+		for (i = 0; i < nr_events; i++)
+			pr_info("pl310 event: %s\n",
+				quadd_get_event_str(events[i]));
+	} else {
+		pr_info("PL310 not found\n");
+	}
+
+	ctx.hrt = quadd_hrt_init(&ctx);
+	if (!ctx.hrt) {
+		pr_err("error: HRT init failed\n");
+		return -ENODEV;
+	}
+
+	ctx.mmap = quadd_mmap_init(&ctx);
+	if (!ctx.mmap) {
+		pr_err("error: MMAP init failed\n");
+		return -ENODEV;
+	}
+
+	err = quadd_power_clk_init(&ctx);
+	if (err < 0) {
+		pr_err("error: POWER CLK init failed\n");
+		return err;
+	}
+
+	ctx.comm = quadd_comm_events_init(&control);
+	if (!ctx.comm) {
+		pr_err("error: COMM init failed\n");
+		return -ENODEV;
+	}
+
+	err = quadd_auth_init(&ctx);
+	if (err < 0) {
+		pr_err("error: auth failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit quadd_module_exit(void)
+{
+	pr_info("QuadD module exit\n");
+
+	quadd_hrt_deinit();
+	quadd_mmap_deinit();
+	quadd_power_clk_deinit();
+	quadd_comm_events_exit();
+	quadd_auth_deinit();
+}
+
+module_init(quadd_module_init);
+module_exit(quadd_module_exit);
+
+MODULE_LICENSE("GPL");
+
+MODULE_AUTHOR("Nvidia Ltd");
+MODULE_DESCRIPTION("Tegra profiler");
diff --git a/drivers/misc/tegra-profiler/mmap.c b/drivers/misc/tegra-profiler/mmap.c
new file mode 100644
index 000000000000..a52b11f74cd2
--- /dev/null
+++ b/drivers/misc/tegra-profiler/mmap.c
@@ -0,0 +1,236 @@
+/*
+ * drivers/misc/tegra-profiler/mmap.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/crc32.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "mmap.h"
+#include "hrt.h"
+#include "debug.h"
+
+static struct quadd_mmap_ctx mmap_ctx;
+
+static int binary_search_and_add(unsigned int *array,
+			unsigned int length, unsigned int key)
+{
+	unsigned int i_min, i_max, mid;
+
+	if (length == 0) {
+		array[0] = key;
+		return 1;
+	} else if (length == 1 && array[0] == key) {
+		return 0;
+	}
+
+	i_min = 0;
+	i_max = length;
+
+	if (array[0] > key) {
+		memmove((char *)((unsigned int *)array + 1), array,
+			length * sizeof(unsigned int));
+		array[0] = key;
+		return 1;
+	} else if (array[length - 1] < key) {
+		array[length] = key;
+		return 1;
+	}
+
+	while (i_min < i_max) {
+		mid = i_min + (i_max - i_min) / 2;
+
+		if (key <= array[mid])
+			i_max = mid;
+		else
+			i_min = mid + 1;
+	}
+
+	if (array[i_max] == key) {
+		return 0;
+	} else {
+		memmove((char *)((unsigned int *)array + i_max + 1),
+			(char *)((unsigned int *)array + i_max),
+			(length - i_max) * sizeof(unsigned int));
+		array[i_max] = key;
+		return 1;
+	}
+}
+
+static int check_hash(u32 key)
+{
+	int res;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmap_ctx.lock, flags);
+
+	if (mmap_ctx.nr_hashes >= QUADD_MMAP_SIZE_ARRAY) {
+		spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+		return 1;
+	}
+
+	res = binary_search_and_add(mmap_ctx.hash_array,
+				    mmap_ctx.nr_hashes, key);
+	if (res > 0) {
+		mmap_ctx.nr_hashes++;
+		spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+		return 0;
+	}
+
+	spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+	return 1;
+}
+
+char *quadd_get_mmap(struct quadd_cpu_context *cpu_ctx,
+		     struct pt_regs *regs, struct quadd_mmap_data *sample,
+		     unsigned int *extra_length)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct file *vm_file;
+	struct path *path;
+	char *file_name = NULL;
+	int length, length_aligned;
+	u32 crc;
+	unsigned long ip;
+
+	if (!mm) {
+		*extra_length = 0;
+		return NULL;
+	}
+
+	ip = instruction_pointer(regs);
+
+	if (user_mode(regs)) {
+		for (vma = find_vma(mm, ip); vma; vma = vma->vm_next) {
+			if (ip < vma->vm_start || ip >= vma->vm_end)
+				continue;
+
+			vm_file = vma->vm_file;
+			if (!vm_file)
+				break;
+
+			path = &vm_file->f_path;
+
+			file_name = d_path(path, mmap_ctx.tmp_buf, PATH_MAX);
+			if (file_name) {
+				sample->addr = vma->vm_start;
+				sample->len = vma->vm_end - vma->vm_start;
+				sample->pgoff =
+					(u64)vma->vm_pgoff << PAGE_SHIFT;
+			}
+			break;
+		}
+	} else {
+		struct module *mod;
+
+		preempt_disable();
+		mod = __module_address(ip);
+		preempt_enable();
+
+		if (mod) {
+			file_name = mod->name;
+			if (file_name) {
+				sample->addr = (u32) mod->module_core;
+				sample->len = mod->core_size;
+				sample->pgoff = 0;
+			}
+		}
+	}
+
+	if (file_name) {
+		length = strlen(file_name);
+		if (length >= PATH_MAX) {
+			*extra_length = 0;
+			return NULL;
+		}
+
+		crc = crc32_le(~0, file_name, length);
+		crc = crc32_le(crc, (unsigned char *)&sample->addr,
+			       sizeof(sample->addr));
+		crc = crc32_le(crc, (unsigned char *)&sample->len,
+			       sizeof(sample->len));
+
+		if (!check_hash(crc)) {
+			strcpy(cpu_ctx->mmap_filename, file_name);
+			length_aligned = (length + 1 + 7) & (~7);
+			*extra_length = length_aligned;
+
+			return cpu_ctx->mmap_filename;
+		}
+	}
+
+	*extra_length = 0;
+	return NULL;
+}
+
+struct quadd_mmap_ctx *quadd_mmap_init(struct quadd_ctx *quadd_ctx)
+{
+	u32 *hash;
+	char *tmp;
+
+	mmap_ctx.quadd_ctx = quadd_ctx;
+
+	hash = kzalloc(QUADD_MMAP_SIZE_ARRAY * sizeof(unsigned int),
+		       GFP_KERNEL);
+	if (!hash) {
+		pr_err("Alloc error\n");
+		return NULL;
+	}
+	mmap_ctx.hash_array = hash;
+
+	mmap_ctx.nr_hashes = 0;
+	spin_lock_init(&mmap_ctx.lock);
+
+	tmp = kzalloc(PATH_MAX + sizeof(unsigned long long),
+		      GFP_KERNEL);
+	if (!tmp) {
+		pr_err("Alloc error\n");
+		return NULL;
+	}
+	mmap_ctx.tmp_buf = tmp;
+
+	return &mmap_ctx;
+}
+
+void quadd_mmap_reset(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmap_ctx.lock, flags);
+	mmap_ctx.nr_hashes = 0;
+	spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+}
+
+void quadd_mmap_deinit(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmap_ctx.lock, flags);
+
+	kfree(mmap_ctx.hash_array);
+	mmap_ctx.hash_array = NULL;
+
+	kfree(mmap_ctx.tmp_buf);
+	mmap_ctx.tmp_buf = NULL;
+
+	spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+}
diff --git a/drivers/misc/tegra-profiler/mmap.h b/drivers/misc/tegra-profiler/mmap.h
new file mode 100644
index 000000000000..f12ec4d61ed5
--- /dev/null
+++ b/drivers/misc/tegra-profiler/mmap.h
@@ -0,0 +1,47 @@
+/*
+ * drivers/misc/tegra-profiler/mmap.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_MMAP_H
+#define __QUADD_MMAP_H
+
+#include <linux/types.h>
+
+struct quadd_cpu_context;
+struct quadd_ctx;
+struct quadd_mmap_data;
+
+#define QUADD_MMAP_SIZE_ARRAY	4096
+
+struct quadd_mmap_ctx {
+	u32 *hash_array;
+	unsigned int nr_hashes;
+	spinlock_t lock;
+
+	char *tmp_buf;
+
+	struct quadd_ctx *quadd_ctx;
+};
+
+char *quadd_get_mmap(struct quadd_cpu_context *cpu_ctx,
+		     struct pt_regs *regs, struct quadd_mmap_data *sample,
+		     unsigned int *extra_length);
+
+
+struct quadd_mmap_ctx *quadd_mmap_init(struct quadd_ctx *quadd_ctx);
+void quadd_mmap_deinit(void);
+void quadd_mmap_reset(void);
+
+#endif  /* __QUADD_MMAP_H */
diff --git a/drivers/misc/tegra-profiler/pl310.c b/drivers/misc/tegra-profiler/pl310.c
new file mode 100644
index 000000000000..010830823817
--- /dev/null
+++ b/drivers/misc/tegra-profiler/pl310.c
@@ -0,0 +1,317 @@
+/*
+ * drivers/misc/tegra-profiler/pl310.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/hardware/cache-l2x0.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "pl310.h"
+#include "debug.h"
+
+DEFINE_PER_CPU(u32, pl310_prev_val);
+
+static struct l2x0_context l2x0_ctx;
+
+static void l2x0_enable_event_counters(u32 event0, u32 event1)
+{
+	u32 reg_val;
+	void __iomem *base = l2x0_ctx.l2x0_base;
+
+	/* configure counter0 */
+	reg_val = event0;
+	writel_relaxed(reg_val, base + L2X0_EVENT_CNT0_CFG);
+
+	/* configure counter1 */
+	reg_val = event1;
+	writel_relaxed(reg_val, base + L2X0_EVENT_CNT1_CFG);
+
+	/* enable event counting */
+	reg_val = L2X0_EVENT_CNT_ENABLE;
+	writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void __maybe_unused l2x0_disable_event_counters(void)
+{
+	u32 reg_val;
+	void __iomem *base = l2x0_ctx.l2x0_base;
+
+	/* disable event counting */
+	reg_val = 0;
+	writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void l2x0_stop_event_counters(void)
+{
+	void __iomem *base = l2x0_ctx.l2x0_base;
+
+	writel_relaxed(0, base + L2X0_EVENT_CNT_CTRL);
+
+	writel_relaxed(0, base + L2X0_EVENT_CNT0_CFG);
+	writel_relaxed(0, base + L2X0_EVENT_CNT1_CFG);
+}
+
+static void l2x0_reset_event_counters(void)
+{
+	u32 reg_val;
+	void __iomem *base = l2x0_ctx.l2x0_base;
+
+	reg_val = readl_relaxed(base + L2X0_EVENT_CNT_CTRL);
+	reg_val |= L2X0_EVENT_CNT_RESET_CNT0 | L2X0_EVENT_CNT_RESET_CNT1;
+	writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL);
+}
+
+static u32 l2x0_read_event_counter(enum quadd_l2x0_counter counter)
+{
+	u32 reg_val = 0;
+	void __iomem *base = l2x0_ctx.l2x0_base;
+
+	switch (counter) {
+	case QUADD_L2X0_COUNTER0:
+		reg_val = readl_relaxed(base + L2X0_EVENT_CNT0_VAL);
+		break;
+	case QUADD_L2X0_COUNTER1:
+		reg_val = readl_relaxed(base + L2X0_EVENT_CNT1_VAL);
+		break;
+	}
+
+	return reg_val;
+}
+
+static void l2x0_enable_perf_event(enum quadd_l2x0_event_type type)
+{
+	l2x0_reset_event_counters();
+
+	switch (type) {
+	case QUADD_L2X0_TYPE_DATA_READ_MISSES:
+		l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_DRREQ,
+					   L2X0_EVENT_CNT_CFG_DRHIT);
+		break;
+	case QUADD_L2X0_TYPE_DATA_WRITE_MISSES:
+		l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_DWREQ,
+					   L2X0_EVENT_CNT_CFG_DWHIT);
+		break;
+	case QUADD_L2X0_TYPE_INSTRUCTION_MISSES:
+		l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_IRREQ,
+					   L2X0_EVENT_CNT_CFG_IRHIT);
+		break;
+	}
+}
+
+static u32 l2x0_read_perf_event(void)
+{
+	u32 count_req, count_hit, count_miss;
+
+	count_req = l2x0_read_event_counter(QUADD_L2X0_COUNTER0);
+	count_hit = l2x0_read_event_counter(QUADD_L2X0_COUNTER1);
+
+	count_miss = count_req - count_hit;
+	if (count_req < count_hit)
+		return 0;
+
+	return count_miss;
+}
+
+static void l2x0_clear_values(void)
+{
+	int cpu_id;
+	for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++)
+		per_cpu(pl310_prev_val, cpu_id) = 0;
+}
+
+static int l2x0_events_enable(void)
+{
+	return 0;
+}
+
+static void l2x0_events_disable(void)
+{
+}
+
+static void l2x0_events_start(void)
+{
+	unsigned long flags;
+
+	if (l2x0_ctx.l2x0_event_type < 0)
+		return;
+
+	spin_lock_irqsave(&l2x0_ctx.lock, flags);
+	l2x0_clear_values();
+	l2x0_enable_perf_event(l2x0_ctx.l2x0_event_type);
+	spin_unlock_irqrestore(&l2x0_ctx.lock, flags);
+
+	qm_debug_start_source(QUADD_EVENT_SOURCE_PL310);
+}
+
+static void l2x0_events_stop(void)
+{
+	unsigned long flags;
+
+	if (l2x0_ctx.l2x0_event_type < 0)
+		return;
+
+	spin_lock_irqsave(&l2x0_ctx.lock, flags);
+	l2x0_stop_event_counters();
+	l2x0_clear_values();
+	spin_unlock_irqrestore(&l2x0_ctx.lock, flags);
+
+	qm_debug_stop_source(QUADD_EVENT_SOURCE_PL310);
+}
+
+static int __maybe_unused l2x0_events_read(struct event_data *events)
+{
+	unsigned long flags;
+
+	if (l2x0_ctx.l2x0_event_type < 0) {
+		pr_err_once("pl310 value: %u\n", events[0].val);
+		return 0;
+	}
+
+	events[0].event_source = QUADD_EVENT_SOURCE_PL310;
+	events[0].event_id = l2x0_ctx.event_id;
+
+	spin_lock_irqsave(&l2x0_ctx.lock, flags);
+	events[0].val = l2x0_read_perf_event();
+	spin_unlock_irqrestore(&l2x0_ctx.lock, flags);
+
+	events[0].prev_val = __get_cpu_var(pl310_prev_val);
+
+	__get_cpu_var(pl310_prev_val) = events[0].val;
+
+	qm_debug_read_counter(l2x0_ctx.event_id, events[0].prev_val,
+			      events[0].val);
+
+	return 1;
+}
+
+static int __maybe_unused l2x0_events_read_emulate(struct event_data *events)
+{
+	static u32 val;
+
+	if (val > 100)
+		val = 0;
+
+	events[0].event_source = QUADD_EVENT_SOURCE_PL310;
+	events[0].event_id = QUADD_L2X0_TYPE_DATA_READ_MISSES;
+
+	events[0].val = val;
+	events[0].prev_val = __get_cpu_var(pl310_prev_val);
+
+	__get_cpu_var(pl310_prev_val) = val;
+
+	val += 10;
+
+	return 1;
+}
+
+static int l2x0_set_events(int *events, int size)
+{
+	if (!events || size == 0) {
+		l2x0_ctx.l2x0_event_type = -1;
+		l2x0_ctx.event_id = -1;
+		return 0;
+	}
+
+	if (size != 1) {
+		pr_err("Error: number of events more than one\n");
+		return -ENOSPC;
+	}
+
+	switch (*events) {
+	case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+		l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_READ_MISSES;
+		break;
+	case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+		l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_WRITE_MISSES;
+		break;
+	case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+		l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_INSTRUCTION_MISSES;
+		break;
+	default:
+		pr_err("Error event: %s\n", quadd_get_event_str(*events));
+		return 1;
+	}
+	l2x0_ctx.event_id = *events;
+
+	pr_info("Event has been added: id/l2x0: %s/%#x\n",
+		quadd_get_event_str(*events), l2x0_ctx.l2x0_event_type);
+	return 0;
+}
+
+static int get_supported_events(int *events)
+{
+	events[0] = QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES;
+	events[1] = QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES;
+	events[2] = QUADD_EVENT_TYPE_L2_ICACHE_MISSES;
+	return 3;
+}
+
+static struct quadd_event_source_interface l2x0_int = {
+	.enable			= l2x0_events_enable,
+	.disable		= l2x0_events_disable,
+
+	.start			= l2x0_events_start,
+	.stop			= l2x0_events_stop,
+
+#ifndef QUADD_USE_EMULATE_COUNTERS
+	.read			= l2x0_events_read,
+#else
+	.read			= l2x0_events_read_emulate,
+#endif
+	.set_events		= l2x0_set_events,
+	.get_supported_events	= get_supported_events,
+};
+
+struct quadd_event_source_interface *quadd_l2x0_events_init(void)
+{
+	void __iomem *base;
+	unsigned long phys_addr;
+
+	l2x0_ctx.l2x0_event_type = -1;
+	l2x0_ctx.event_id = -1;
+
+	l2x0_ctx.l2x0_base = NULL;
+
+	phys_addr = quadd_get_pl310_phys_addr();
+	if (!phys_addr)
+		return NULL;
+
+	base = ioremap(phys_addr, SZ_4K);
+	if (base) {
+		u32 cache_id = readl(base + L2X0_CACHE_ID);
+
+		if ((cache_id & 0xff0003c0) != 0x410000c0) {
+			iounmap(base);
+			return NULL;
+		}
+	}
+
+	if (!base)
+		return NULL;
+
+	l2x0_ctx.l2x0_base = base;
+
+	l2x0_clear_values();
+	spin_lock_init(&l2x0_ctx.lock);
+
+	pr_debug("pl310 init success, l2x0_base: %p\n", base);
+	return &l2x0_int;
+}
diff --git a/drivers/misc/tegra-profiler/pl310.h b/drivers/misc/tegra-profiler/pl310.h
new file mode 100644
index 000000000000..96e60bfea9db
--- /dev/null
+++ b/drivers/misc/tegra-profiler/pl310.h
@@ -0,0 +1,79 @@
+/*
+ * drivers/misc/tegra-profiler/pl310.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_PL310_H
+#define __QUADD_PL310_H
+
+/*
+ * l2x0 event type
+ */
+enum quadd_l2x0_event_type {
+	QUADD_L2X0_TYPE_DATA_READ_MISSES	= 0,
+	QUADD_L2X0_TYPE_DATA_WRITE_MISSES	= 1,
+	QUADD_L2X0_TYPE_INSTRUCTION_MISSES	= 2,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/io.h>
+
+#define L2X0_EVENT_CNT_ENABLE		(1 << 0)
+#define L2X0_EVENT_CNT_RESET_CNT0	(1 << 1)
+#define L2X0_EVENT_CNT_RESET_CNT1	(2 << 1)
+
+
+#define L2X0_EVENT_CNT_CFG_DRHIT	(2 << 2)
+#define L2X0_EVENT_CNT_CFG_DRREQ	(3 << 2)
+
+#define L2X0_EVENT_CNT_CFG_DWHIT	(4 << 2)
+#define L2X0_EVENT_CNT_CFG_DWREQ	(5 << 2)
+
+#define L2X0_EVENT_CNT_CFG_IRHIT	(7 << 2)
+#define L2X0_EVENT_CNT_CFG_IRREQ	(8 << 2)
+
+/*
+ * l2x0 counters
+ */
+enum quadd_l2x0_counter {
+	QUADD_L2X0_COUNTER1 = 0,
+	QUADD_L2X0_COUNTER0 = 1,
+};
+
+struct l2x0_context {
+	int l2x0_event_type;
+	int event_id;
+
+	void __iomem *l2x0_base;
+	spinlock_t lock;
+};
+
+struct quadd_event_source_interface;
+
+struct quadd_event_source_interface *quadd_l2x0_events_init(void);
+
+static inline unsigned long quadd_get_pl310_phys_addr(void)
+{
+	unsigned long phys_addr = 0;
+
+#if defined(CONFIG_ARCH_TEGRA)
+	phys_addr = 0x50043000;
+#endif
+	return phys_addr;
+}
+
+#endif  /* __KERNEL__ */
+
+#endif	/* __QUADD_PL310_H */
diff --git a/drivers/misc/tegra-profiler/power_clk.c b/drivers/misc/tegra-profiler/power_clk.c
new file mode 100644
index 000000000000..b5b73f4afcea
--- /dev/null
+++ b/drivers/misc/tegra-profiler/power_clk.c
@@ -0,0 +1,454 @@
+/*
+ * drivers/misc/tegra-profiler/power_clk.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/clk.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/timer.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "power_clk.h"
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "debug.h"
+
+#define POWER_CLK_MAX_VALUES	32
+
+typedef int (*notifier_call_ft)(struct notifier_block *, unsigned long, void *);
+
+struct power_clk_data {
+	unsigned long value;
+	unsigned long prev;
+};
+
+struct power_clk_source {
+	int type;
+
+	struct clk *clkp;
+	struct notifier_block nb;
+
+	int nr;
+	struct power_clk_data data[POWER_CLK_MAX_VALUES];
+
+	unsigned long long counter;
+	atomic_t active;
+
+	struct mutex lock;
+};
+
+struct power_clk_context_s {
+	struct power_clk_source cpu;
+	struct power_clk_source gpu;
+	struct power_clk_source emc;
+
+	struct timer_list timer;
+	unsigned int period;
+
+	struct quadd_ctx *quadd_ctx;
+};
+
+enum {
+	QUADD_POWER_CLK_CPU = 1,
+	QUADD_POWER_CLK_GPU,
+	QUADD_POWER_CLK_EMC,
+};
+
+static struct power_clk_context_s power_ctx;
+
+static void check_clks(void);
+
+static void read_source(struct power_clk_source *s)
+{
+	int i;
+
+	mutex_lock(&s->lock);
+
+	switch (s->type) {
+	case QUADD_POWER_CLK_CPU:
+		/* update cpu frequency */
+		for (i = 0; i < nr_cpu_ids; i++)
+			s->data[i].value = cpufreq_get(i);
+		break;
+
+	case QUADD_POWER_CLK_GPU:
+		/* update gpu frequency */
+		s->clkp = clk_get_sys("3d", NULL);
+		if (s->clkp) {
+			s->data[0].value =
+				clk_get_rate(s->clkp) / 1000;
+			clk_put(s->clkp);
+		}
+		break;
+
+	case QUADD_POWER_CLK_EMC:
+		/* update emc frequency */
+		s->clkp = clk_get_sys("cpu", "emc");
+		if (s->clkp) {
+			s->data[0].value =
+				clk_get_rate(s->clkp) / 1000;
+			clk_put(s->clkp);
+		}
+		break;
+
+	default:
+		BUG();
+	}
+
+	mutex_unlock(&s->lock);
+	s->counter++;
+}
+
+static int
+gpu_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+	read_source(&power_ctx.gpu);
+	check_clks();
+
+	return 0;
+}
+
+static int
+emc_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+	read_source(&power_ctx.emc);
+	check_clks();
+
+	return 0;
+}
+
+static int
+cpu_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+	read_source(&power_ctx.cpu);
+
+#ifndef CONFIG_COMMON_CLK
+	read_source(&power_ctx.gpu);
+	read_source(&power_ctx.emc);
+#endif
+
+	check_clks();
+
+	return 0;
+}
+
+static void make_sample(void)
+{
+	int i;
+	u32 extra_cpus[NR_CPUS];
+	struct power_clk_source *s;
+
+	struct quadd_record_data record;
+	struct quadd_power_rate_data *power_rate = &record.power_rate;
+	struct quadd_comm_data_interface *comm = power_ctx.quadd_ctx->comm;
+
+	record.magic = QUADD_RECORD_MAGIC;
+	record.record_type = QUADD_RECORD_TYPE_POWER_RATE;
+	record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+	power_rate->time = quadd_get_time();
+
+	s = &power_ctx.cpu;
+	mutex_lock(&s->lock);
+	if (atomic_read(&s->active)) {
+		power_rate->nr_cpus = s->nr;
+		for (i = 0; i < s->nr; i++)
+			extra_cpus[i] = s->data[i].value;
+	} else {
+		power_rate->nr_cpus = 0;
+	}
+	mutex_unlock(&s->lock);
+
+	s = &power_ctx.gpu;
+	mutex_lock(&s->lock);
+	if (atomic_read(&s->active))
+		power_rate->gpu = s->data[0].value;
+	else
+		power_rate->gpu = 0;
+
+	mutex_unlock(&s->lock);
+
+	s = &power_ctx.emc;
+	mutex_lock(&s->lock);
+	if (atomic_read(&s->active))
+		power_rate->emc = s->data[0].value;
+	else
+		power_rate->emc = 0;
+
+	mutex_unlock(&s->lock);
+/*
+	pr_debug("make_sample: cpu: %u/%u/%u/%u, gpu: %u, emc: %u\n",
+		 extra_cpus[0], extra_cpus[1], extra_cpus[2], extra_cpus[3],
+		 power_rate->gpu, power_rate->emc);
+*/
+	comm->put_sample(&record, (char *)extra_cpus,
+			 power_rate->nr_cpus * sizeof(extra_cpus[0]));
+}
+
+static inline int is_data_changed(struct power_clk_source *s)
+{
+	int i;
+
+	mutex_lock(&s->lock);
+	for (i = 0; i < s->nr; i++) {
+		if (s->data[i].value != s->data[i].prev) {
+			mutex_unlock(&s->lock);
+			return 1;
+		}
+	}
+	mutex_unlock(&s->lock);
+
+	return 0;
+}
+
+static inline void update_data(struct power_clk_source *s)
+{
+	int i;
+
+	mutex_lock(&s->lock);
+
+	for (i = 0; i < s->nr; i++)
+		s->data[i].prev = s->data[i].value;
+
+	mutex_unlock(&s->lock);
+}
+
+static void check_clks(void)
+{
+	int changed = 0;
+
+	if (is_data_changed(&power_ctx.cpu)) {
+		update_data(&power_ctx.cpu);
+		changed = 1;
+	}
+
+	if (is_data_changed(&power_ctx.gpu)) {
+		update_data(&power_ctx.gpu);
+		changed = 1;
+	}
+
+	if (is_data_changed(&power_ctx.emc)) {
+		update_data(&power_ctx.emc);
+		changed = 1;
+	}
+/*
+	pr_debug("cpu: %lu/%lu/%lu/%lu, gpu: %lu, emc: %lu, changed: %s\n",
+		 power_ctx.cpu.data[0].value, power_ctx.cpu.data[1].value,
+		 power_ctx.cpu.data[2].value, power_ctx.cpu.data[3].value,
+		 power_ctx.gpu.data[0].value, power_ctx.emc.data[0].value,
+		 changed ? "yes" : "no");
+*/
+	if (changed)
+		make_sample();
+}
+
+static void reset_data(struct power_clk_source *s)
+{
+	int i;
+
+	mutex_lock(&s->lock);
+	for (i = 0; i < s->nr; i++) {
+		s->data[i].value = 0;
+		s->data[i].prev = 0;
+	}
+	atomic_set(s, 0);
+	mutex_unlock(&s->lock);
+}
+
+static void init_source(struct power_clk_source *s,
+			notifier_call_ft notifier,
+			int nr_values,
+			int type)
+{
+	s->type = type;
+	s->nb.notifier_call = notifier;
+	s->nr = nr_values;
+
+	mutex_init(&s->lock);
+	reset_data(s);
+}
+
+static void
+power_clk_work_func(struct work_struct *dummy)
+{
+#ifndef CONFIG_COMMON_CLK
+	read_source(&power_ctx.gpu);
+	read_source(&power_ctx.emc);
+
+	check_clks();
+#endif
+}
+
+static DECLARE_WORK(power_clk_work, power_clk_work_func);
+
+static void power_clk_timer(unsigned long data)
+{
+	struct timer_list *timer = &power_ctx.timer;
+
+	schedule_work(&power_clk_work);
+	timer->expires = jiffies + msecs_to_jiffies(power_ctx.period);
+	add_timer(timer);
+}
+
+int quadd_power_clk_is_enabled(int *period)
+{
+	struct quadd_parameters *param = &power_ctx.quadd_ctx->param;
+
+	*period = power_ctx.period;
+
+	if (param->power_rate_freq == 0)
+		return 0;
+
+	return 1;
+}
+
+int quadd_power_clk_start(void)
+{
+	struct power_clk_source *s;
+	int status;
+	struct timer_list *timer = &power_ctx.timer;
+	struct quadd_parameters *param = &power_ctx.quadd_ctx->param;
+
+	if (param->power_rate_freq == 0) {
+		pr_info("power_clk is not started\n");
+		return 0;
+	}
+
+#ifdef CONFIG_COMMON_CLK
+	power_ctx.period = 0;
+#else
+	power_ctx.period = MSEC_PER_SEC / param->power_rate_freq;
+#endif
+	pr_info("power_clk: start, freq: %d\n",
+		param->power_rate_freq);
+
+	/* setup gpu frequency */
+	s = &power_ctx.gpu;
+	s->clkp = clk_get_sys("3d", NULL);
+	if (s->clkp) {
+#ifdef CONFIG_COMMON_CLK
+		status = clk_notifier_register(s->clkp, s->nb);
+		if (status < 0) {
+			pr_err("error: could not setup gpu freq\n");
+			return status;
+		}
+		clk_put(s->clkp);
+#endif
+		reset_data(s);
+		atomic_set(&s->active, 1);
+	} else {
+		pr_err("error: could not setup gpu freq\n");
+		atomic_set(&s->active, 0);
+	}
+
+	/* setup emc frequency */
+	s = &power_ctx.emc;
+	s->clkp = clk_get_sys("cpu", "emc");
+	if (s->clkp) {
+#ifdef CONFIG_COMMON_CLK
+		status = clk_notifier_register(s->clkp, s->nb);
+		if (status < 0) {
+			pr_err("error: could not setup emc freq\n");
+			return status;
+		}
+		clk_put(s->clkp);
+#endif
+		reset_data(s);
+		atomic_set(&s->active, 1);
+	} else {
+		pr_err("error: could not setup emc freq\n");
+		atomic_set(&s->active, 0);
+	}
+
+	/* setup cpu frequency notifier */
+	s = &power_ctx.cpu;
+	status = register_cpu_notifier(&s->nb);
+	if (status < 0) {
+		pr_err("error: could not setup cpu freq\n");
+		return status;
+	}
+	reset_data(s);
+
+	if (power_ctx.period > 0) {
+		init_timer(timer);
+		timer->function = power_clk_timer;
+		timer->expires = jiffies + msecs_to_jiffies(power_ctx.period);
+		timer->data = 0;
+		add_timer(timer);
+	}
+
+	atomic_set(&s->active, 1);
+
+	return 0;
+}
+
+void quadd_power_clk_stop(void)
+{
+	struct power_clk_source *s;
+
+	if (power_ctx.quadd_ctx->param.power_rate_freq == 0)
+		return;
+
+	if (power_ctx.period > 0)
+		del_timer_sync(&power_ctx.timer);
+
+	s = &power_ctx.gpu;
+	if (atomic_cmpxchg(&s->active, 1, 0)) {
+#ifdef CONFIG_COMMON_CLK
+		if (s->clkp)
+			clk_notifier_unregister(s->clkp, &s->nb);
+#endif
+	}
+
+	s = &power_ctx.emc;
+	if (atomic_cmpxchg(&s->active, 1, 0)) {
+#ifdef CONFIG_COMMON_CLK
+		if (s->clkp)
+			clk_notifier_unregister(s->clkp, &s->nb);
+#endif
+	}
+
+	s = &power_ctx.cpu;
+	if (atomic_cmpxchg(&s->active, 1, 0)) {
+		pr_info("power_clk: stop\n");
+		unregister_cpu_notifier(&s->nb);
+	}
+}
+
+int quadd_power_clk_init(struct quadd_ctx *quadd_ctx)
+{
+	pr_info("power_clk: init\n");
+
+	init_source(&power_ctx.cpu, cpu_notifier_call, nr_cpu_ids,
+		    QUADD_POWER_CLK_CPU);
+	init_source(&power_ctx.gpu, gpu_notifier_call, 1, QUADD_POWER_CLK_GPU);
+	init_source(&power_ctx.emc, emc_notifier_call, 1, QUADD_POWER_CLK_EMC);
+
+	power_ctx.quadd_ctx = quadd_ctx;
+
+	return 0;
+}
+
+void quadd_power_clk_deinit(void)
+{
+	pr_info("power_clk: deinit\n");
+	quadd_power_clk_stop();
+}
diff --git a/drivers/misc/tegra-profiler/power_clk.h b/drivers/misc/tegra-profiler/power_clk.h
new file mode 100644
index 000000000000..6854687392f3
--- /dev/null
+++ b/drivers/misc/tegra-profiler/power_clk.h
@@ -0,0 +1,30 @@
+/*
+ * drivers/misc/tegra-profiler/power_clk.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_POWER_CLK_H
+#define __QUADD_POWER_CLK_H
+
+struct quadd_ctx;
+
+void quadd_power_clk_deinit(void);
+int quadd_power_clk_init(struct quadd_ctx *quadd_ctx);
+
+int quadd_power_clk_start(void);
+void quadd_power_clk_stop(void);
+
+int quadd_power_clk_is_enabled(int *period);
+
+#endif /* __QUADD_POWER_CLK_H */
diff --git a/drivers/misc/tegra-profiler/quadd.h b/drivers/misc/tegra-profiler/quadd.h
new file mode 100644
index 000000000000..743482dd33b4
--- /dev/null
+++ b/drivers/misc/tegra-profiler/quadd.h
@@ -0,0 +1,62 @@
+/*
+ * drivers/misc/tegra-profiler/quadd.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_H
+#define __QUADD_H
+
+#include <linux/tegra_profiler.h>
+
+/* #define QUADD_USE_EMULATE_COUNTERS	1 */
+
+struct event_data;
+struct quadd_comm_data_interface;
+struct quadd_hrt_ctx;
+struct quadd_mmap_ctx;
+
+struct quadd_event_source_interface {
+	int (*enable)(void);
+	void (*disable)(void);
+	void (*start)(void);
+	void (*stop)(void);
+	int (*read)(struct event_data *events);
+	int (*set_events)(int *events, int size);
+	int (*get_supported_events)(int *events);
+};
+
+struct source_info {
+	int supported_events[QUADD_MAX_COUNTERS];
+	int nr_supported_events;
+
+	int active;
+};
+
+struct quadd_ctx {
+	struct quadd_parameters param;
+
+	struct quadd_event_source_interface *pmu;
+	struct source_info pmu_info;
+
+	struct quadd_event_source_interface *pl310;
+	struct source_info pl310_info;
+
+	struct quadd_comm_data_interface *comm;
+	struct quadd_hrt_ctx *hrt;
+	struct quadd_mmap_ctx *mmap;
+
+	atomic_t started;
+};
+
+#endif	/* __QUADD_H */
diff --git a/drivers/misc/tegra-profiler/tegra.h b/drivers/misc/tegra-profiler/tegra.h
new file mode 100644
index 000000000000..013c5abd644f
--- /dev/null
+++ b/drivers/misc/tegra-profiler/tegra.h
@@ -0,0 +1,48 @@
+/*
+ * drivers/misc/tegra-profiler/tegra.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_TEGRA_H
+#define __QUADD_TEGRA_H
+
+#include <linux/smp.h>
+
+#ifdef CONFIG_TEGRA_CLUSTER_CONTROL
+#include <linux/io.h>
+#include <../../mach-tegra/pm.h>
+#endif
+
+static inline int quadd_get_processor_id(void)
+{
+	int cpu_id = smp_processor_id();
+
+#ifdef CONFIG_TEGRA_CLUSTER_CONTROL
+	if (is_lp_cluster())
+		cpu_id |= QM_TEGRA_POWER_CLUSTER_LP;
+#endif
+
+	return cpu_id;
+}
+
+static inline int quadd_is_cpu_with_lp_cluster(void)
+{
+#ifdef CONFIG_TEGRA_CLUSTER_CONTROL
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+#endif  /* __QUADD_TEGRA_H */
diff --git a/drivers/misc/tegra-profiler/version.h b/drivers/misc/tegra-profiler/version.h
new file mode 100644
index 000000000000..b5cf277a24b3
--- /dev/null
+++ b/drivers/misc/tegra-profiler/version.h
@@ -0,0 +1,23 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_VERSION_H
+#define __QUADD_VERSION_H
+
+#define QUADD_MODULE_VERSION		"1.23"
+#define QUADD_MODULE_BRANCH		"Dev"
+
+#endif	/* __QUADD_VERSION_H */
author	Igor Nabirushkin <inabirushkin@nvidia.com>	2013-07-18 21:42:07 +0400
committer	Gabby Lee <galee@nvidia.com>	2013-08-20 18:25:26 -0700
commit	0a2223fea024a7a1861fd7ba5a7cef909e8f3dbc (patch)
tree	ba8f504b3e0dea9bc90008dc3f70cbfa871ace29 /drivers
parent	df842a2215742f2f0f0034761dec3de0a5efa048 (diff)