diff options
author | Igor Nabirushkin <inabirushkin@nvidia.com> | 2013-07-18 21:42:07 +0400 |
---|---|---|
committer | Gabby Lee <galee@nvidia.com> | 2013-08-20 18:25:26 -0700 |
commit | 0a2223fea024a7a1861fd7ba5a7cef909e8f3dbc (patch) | |
tree | ba8f504b3e0dea9bc90008dc3f70cbfa871ace29 /drivers | |
parent | df842a2215742f2f0f0034761dec3de0a5efa048 (diff) |
ARM: tegra: Add Tegra Profiler
Add Tegra Profiler kernel misc driver
Bug 1312406
Change-Id: Ie5715bd0029a4d06fb20fa4ad8724827411a6c3b
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/250840
GVS: Gerrit_Virtual_Submit
Reviewed-by: Daniel Horowitz <dhorowitz@nvidia.com>
Tested-by: Daniel Horowitz <dhorowitz@nvidia.com>
Reviewed-by: Gabby Lee <galee@nvidia.com>
Diffstat (limited to 'drivers')
28 files changed, 4959 insertions, 0 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 9d43f29d3cbe..a8d12e246938 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -616,4 +616,5 @@ source "drivers/misc/carma/Kconfig" source "drivers/misc/altera-stapl/Kconfig" source "drivers/misc/tegra-baseband/Kconfig" source "drivers/misc/tegra-cec/Kconfig" +source "drivers/misc/tegra-profiler/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 536df7e36498..4403b1ec0ea1 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -69,3 +69,4 @@ obj-$(CONFIG_SND_SOC_TEGRA_RT5640) += tfa9887.o obj-$(CONFIG_FAN_THERM_EST) += therm_fan_est.o obj-$(CONFIG_BLUEDROID_PM) += bluedroid_pm.o obj-$(CONFIG_CPULOAD_MONITOR) += cpuload.o +obj-$(CONFIG_TEGRA_PROFILER) += tegra-profiler/ diff --git a/drivers/misc/tegra-profiler/Kconfig b/drivers/misc/tegra-profiler/Kconfig new file mode 100644 index 000000000000..312b75e9b848 --- /dev/null +++ b/drivers/misc/tegra-profiler/Kconfig @@ -0,0 +1,6 @@ +config TEGRA_PROFILER + bool "Enable Tegra profiler" + depends on ARCH_TEGRA + select KPROBES + help + This option enables Tegra profiler diff --git a/drivers/misc/tegra-profiler/Makefile b/drivers/misc/tegra-profiler/Makefile new file mode 100644 index 000000000000..7b3d8088aa2f --- /dev/null +++ b/drivers/misc/tegra-profiler/Makefile @@ -0,0 +1,18 @@ +# +# drivers/misc/tegra-profiler/Makefile +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. +# + +obj-$(CONFIG_TEGRA_PROFILER) += tegra-profiler.o +tegra-profiler-objs := main.o armv7_pmu.o pl310.o hrt.o comm.o mmap.o backtrace.o debug.o ma.o power_clk.o auth.o + diff --git a/drivers/misc/tegra-profiler/armv7_pmu.c b/drivers/misc/tegra-profiler/armv7_pmu.c new file mode 100644 index 000000000000..04436f8c7e02 --- /dev/null +++ b/drivers/misc/tegra-profiler/armv7_pmu.c @@ -0,0 +1,487 @@ +/* + * drivers/misc/tegra-profiler/armv7_pmu.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <asm/cputype.h> +#include <asm/pmu.h> + +#include <linux/tegra_profiler.h> + +#include "armv7_pmu.h" +#include "quadd.h" +#include "debug.h" + +static struct armv7_pmu_ctx pmu_ctx; + +DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val); + +static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = { + [QUADD_EVENT_TYPE_INSTRUCTIONS] = + QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE, + [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] = + QUADD_ARMV7_HW_EVENT_PC_WRITE, + [QUADD_EVENT_TYPE_BRANCH_MISSES] = + QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED, + [QUADD_EVENT_TYPE_BUS_CYCLES] = + QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES, + + [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] = + QUADD_ARMV7_HW_EVENT_DCACHE_REFILL, + [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] = + QUADD_ARMV7_HW_EVENT_DCACHE_REFILL, + [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] = + QUADD_ARMV7_HW_EVENT_IFETCH_MISS, + + [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] = + QUADD_ARMV7_UNSUPPORTED_EVENT, + [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] = + QUADD_ARMV7_UNSUPPORTED_EVENT, + [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] = + QUADD_ARMV7_UNSUPPORTED_EVENT, +}; + +static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = { + [QUADD_EVENT_TYPE_INSTRUCTIONS] = + QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED, + [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] = + QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE, + [QUADD_EVENT_TYPE_BRANCH_MISSES] = + QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED, + [QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES, + + [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] = + QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL, + [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] = + QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL, + [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] = + QUADD_ARMV7_HW_EVENT_IFETCH_MISS, + + [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] = + QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL, + [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] = + QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL, + [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] = + QUADD_ARMV7_UNSUPPORTED_EVENT, +}; + +static u32 armv7_pmu_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static void armv7_pmu_pmnc_write(u32 val) +{ + val &= QUADD_ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static void armv7_pmu_pmnc_enable_counter(int index) +{ + u32 val; + + if (index == QUADD_ARMV7_CYCLE_COUNTER) + val = QUADD_ARMV7_CCNT; + else + val = 1 << index; + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); +} + +static void armv7_pmu_select_counter(unsigned int idx) +{ + u32 val; + + val = idx & QUADD_ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); +} + +static u32 armv7_pmu_adjust_value(u32 value, int event_id) +{ + /* + * Cortex A8/A9: l1 cache performance counters + * don't differentiate between read and write data accesses/misses, + * so currently we are devided by two + */ + if (pmu_ctx.l1_cache_rw && + (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 || + pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) && + (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES || + event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) { + return value / 2; + } + return value; +} + +static u32 armv7_pmu_read_counter(int idx) +{ + u32 val = 0; + + if (idx == QUADD_ARMV7_CYCLE_COUNTER) { + /* Cycle count register (PMCCNTR) reading */ + asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val)); + } else { + /* counter selection*/ + armv7_pmu_select_counter(idx); + /* event count register reading */ + asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val)); + } + + return val; +} + +static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value) +{ + if (idx == QUADD_ARMV7_CYCLE_COUNTER) { + /* Cycle count register (PMCCNTR) writing */ + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + } else { + /* counter selection*/ + armv7_pmu_select_counter(idx); + /* event count register writing */ + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); + } +} + +static void armv7_pmu_event_select(u32 event) +{ + event &= QUADD_ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event)); +} + +static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx) +{ + u32 val; + + if (idx == QUADD_ARMV7_CYCLE_COUNTER) + val = QUADD_ARMV7_CCNT; + else + val = 1 << idx; + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); +} + +static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx) +{ + u32 val; + + if (idx == QUADD_ARMV7_CYCLE_COUNTER) + val = QUADD_ARMV7_CCNT; + else + val = 1 << idx; + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); +} + +static void armv7_pmnc_disable_all_interrupts(void) +{ + u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask; + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); +} + +static void armv7_pmnc_reset_overflow_flags(void) +{ + u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask; + + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); +} + +static inline void select_event(unsigned int idx, unsigned int event) +{ + /* counter selection */ + armv7_pmu_select_counter(idx); + armv7_pmu_event_select(event); +} + +static inline void disable_all_counters(void) +{ + u32 val; + + /* Disable all counters */ + val = armv7_pmu_pmnc_read(); + if (val & QUADD_ARMV7_PMNC_E) + armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E); +} + +static inline void enable_all_counters(void) +{ + u32 val; + + /* Enable all counters */ + val = armv7_pmu_pmnc_read(); + val |= QUADD_ARMV7_PMNC_E | QUADD_ARMV7_PMNC_X; + armv7_pmu_pmnc_write(val); +} + +static inline void quadd_init_pmu(void) +{ + armv7_pmnc_reset_overflow_flags(); + armv7_pmnc_disable_all_interrupts(); +} + +static inline void reset_all_counters(void) +{ + u32 val; + + val = armv7_pmu_pmnc_read(); + val |= QUADD_ARMV7_PMNC_P | QUADD_ARMV7_PMNC_C; + armv7_pmu_pmnc_write(val); +} + +static int pmu_enable(void) +{ + int err; + + err = reserve_pmu(ARM_PMU_DEVICE_CPU); + if (err) { + pr_err("error: pmu was not reserved\n"); + return err; + } + pr_info("pmu was reserved\n"); + return 0; +} + +static void pmu_disable(void) +{ + release_pmu(ARM_PMU_DEVICE_CPU); + pr_info("pmu was released\n"); +} + +static void pmu_start(void) +{ + int i, idx; + u32 event; + u32 *prevp = __get_cpu_var(pmu_prev_val); + + disable_all_counters(); + quadd_init_pmu(); + + for (i = 0; i < pmu_ctx.nr_used_counters; i++) { + struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; + + prevp[i] = 0; + + event = pmu_event->hw_value; + idx = pmu_event->counter_idx; + + if (idx != QUADD_ARMV7_CYCLE_COUNTER) + select_event(idx, event); + + armv7_pmu_pmnc_enable_counter(idx); + } + + reset_all_counters(); + enable_all_counters(); + + qm_debug_start_source(QUADD_EVENT_SOURCE_PMU); +} + +static void pmu_stop(void) +{ + reset_all_counters(); + disable_all_counters(); + + qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU); +} + +static int __maybe_unused pmu_read(struct event_data *events) +{ + int idx, i; + u32 val; + u32 *prevp = __get_cpu_var(pmu_prev_val); + + if (pmu_ctx.nr_used_counters == 0) { + pr_warn_once("error: counters were not initialized\n"); + return 0; + } + + for (i = 0; i < pmu_ctx.nr_used_counters; i++) { + struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; + + idx = pmu_event->counter_idx; + + val = armv7_pmu_read_counter(idx); + val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id); + + events[i].event_source = QUADD_EVENT_SOURCE_PMU; + events[i].event_id = pmu_event->quadd_event_id; + + events[i].val = val; + events[i].prev_val = prevp[i]; + + prevp[i] = val; + + qm_debug_read_counter(events[i].event_id, events[i].prev_val, + events[i].val); + } + + return pmu_ctx.nr_used_counters; +} + +static int __maybe_unused pmu_read_emulate(struct event_data *events) +{ + int i; + static u32 val = 100; + u32 *prevp = __get_cpu_var(pmu_prev_val); + + for (i = 0; i < pmu_ctx.nr_used_counters; i++) { + if (val > 200) + val = 100; + + events[i].event_id = prevp[i]; + events[i].val = val; + + val += 5; + } + + return pmu_ctx.nr_used_counters; +} + +static int set_events(int *events, int size) +{ + int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0; + + pmu_ctx.l1_cache_rw = 0; + pmu_ctx.nr_used_counters = 0; + + if (!events || size == 0) + return 0; + + if (size > QUADD_MAX_PMU_COUNTERS) { + pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS); + return -ENOSPC; + } + + if (!pmu_ctx.current_map) { + pr_err("Invalid current_map\n"); + return -ENODEV; + } + + for (i = 0; i < size; i++) { + struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; + + if (events[i] > QUADD_EVENT_TYPE_MAX) { + pr_err("Error event: %d\n", events[i]); + return -EINVAL; + } + + if (curr_idx >= pmu_ctx.nr_counters) { + pr_err("Too many events (> %d)\n", + pmu_ctx.nr_counters); + return -ENOSPC; + } + + if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) { + pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT; + pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER; + } else { + pmu_event->hw_value = pmu_ctx.current_map[events[i]]; + pmu_event->counter_idx = curr_idx++; + } + pmu_event->quadd_event_id = events[i]; + + if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES) + nr_l1_r++; + else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES) + nr_l1_w++; + + pr_info("Event has been added: id/pmu value: %s/%#x\n", + quadd_get_event_str(events[i]), + pmu_event->hw_value); + } + pmu_ctx.nr_used_counters = size; + + if (nr_l1_r > 0 && nr_l1_w > 0) + pmu_ctx.l1_cache_rw = 1; + + return 0; +} + +static int get_supported_events(int *events) +{ + int i, nr_events = 0; + + for (i = 0; i < QUADD_EVENT_TYPE_MAX; i++) { + if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT) + events[nr_events++] = i; + } + return nr_events; +} + +static struct quadd_event_source_interface pmu_armv7_int = { + .enable = pmu_enable, + .disable = pmu_disable, + + .start = pmu_start, + .stop = pmu_stop, + +#ifndef QUADD_USE_EMULATE_COUNTERS + .read = pmu_read, +#else + .read = pmu_read_emulate, +#endif + .set_events = set_events, + .get_supported_events = get_supported_events, +}; + +struct quadd_event_source_interface *quadd_armv7_pmu_init(void) +{ + struct quadd_event_source_interface *pmu = NULL; + unsigned long cpu_id, cpu_implementer, part_number; + + cpu_id = read_cpuid_id(); + cpu_implementer = cpu_id >> 24; + part_number = cpu_id & 0xFFF0; + + if (cpu_implementer == QUADD_ARM_CPU_IMPLEMENTER) { + switch (part_number) { + case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9: + pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9; + strcpy(pmu_ctx.arch_name, "Cortex A9"); + pmu_ctx.nr_counters = 6; + pmu_ctx.counters_mask = + QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9; + pmu_ctx.current_map = quadd_armv7_a9_events_map; + pmu = &pmu_armv7_int; + break; + + case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15: + pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15; + strcpy(pmu_ctx.arch_name, "Cortex A15"); + pmu_ctx.nr_counters = 6; + pmu_ctx.counters_mask = + QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15; + pmu_ctx.current_map = quadd_armv7_a15_events_map; + pmu = &pmu_armv7_int; + break; + + default: + pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN; + strcpy(pmu_ctx.arch_name, "Unknown"); + pmu_ctx.nr_counters = 0; + pmu_ctx.current_map = NULL; + break; + } + } + + pr_info("arch: %s, number of counters: %d\n", + pmu_ctx.arch_name, pmu_ctx.nr_counters); + return pmu; +} diff --git a/drivers/misc/tegra-profiler/armv7_pmu.h b/drivers/misc/tegra-profiler/armv7_pmu.h new file mode 100644 index 000000000000..827fe4292a33 --- /dev/null +++ b/drivers/misc/tegra-profiler/armv7_pmu.h @@ -0,0 +1,269 @@ +/* + * drivers/misc/tegra-profiler/armv7_pmu.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __ARMV7_PMU_H +#define __ARMV7_PMU_H + +#define QUADD_ARM_CPU_IMPLEMENTER 0x41 + +enum { + QUADD_ARM_CPU_TYPE_UNKNOWN, + QUADD_ARM_CPU_TYPE_CORTEX_A5, + QUADD_ARM_CPU_TYPE_CORTEX_A8, + QUADD_ARM_CPU_TYPE_CORTEX_A9, + QUADD_ARM_CPU_TYPE_CORTEX_A15, +}; + +#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A5 0xC050 +#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A8 0xC080 +#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9 0xC090 +#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15 0xC0F0 + + +#define QUADD_MAX_PMU_COUNTERS 32 + +struct quadd_pmu_event_info { + int quadd_event_id; + int hw_value; + int counter_idx; +}; + +struct armv7_pmu_ctx { + int arch; + char arch_name[32]; + + int nr_counters; + u32 counters_mask; + + struct quadd_pmu_event_info pmu_events[QUADD_MAX_PMU_COUNTERS]; + int nr_used_counters; + + int l1_cache_rw; + int *current_map; +}; + +struct quadd_event_source_interface; + +extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void); + +/* + * PMNC Register + */ + + /* 0/1: disables/enables all counters, including CCNT */ +#define QUADD_ARMV7_PMNC_E (1 << 0) +/* 1: Resets all performance counters to zero. */ +#define QUADD_ARMV7_PMNC_P (1 << 1) +/* 1: Resets cycle counter, CCNT, to zero. */ +#define QUADD_ARMV7_PMNC_C (1 << 2) +/* 0: counts every processor clock cycle, reset value. 1: + counts every 64th processor clock cycle. */ +#define QUADD_ARMV7_PMNC_D (1 << 3) +/* 0/1: Export to ETM disabled/enabled */ +#define QUADD_ARMV7_PMNC_X (1 << 4) +/* 0/1: count is disabled/enabled in regions where + non-invasive debug is prohibited */ +#define QUADD_ARMV7_PMNC_DP (1 << 5) +/* Mask for writable bits */ +#define QUADD_ARMV7_PMNC_MASK 0x3f + + +#define QUADD_ARMV7_CCNT (1 << 31) /* Cycle counter */ + +#define QUADD_ARMV7_CYCLE_COUNTER -1 + +/* + * CNTENS: counters enable reg + */ +#define QUADD_ARMV7_CNTENS_P(i) (1 << i) +#define QUADD_ARMV7_CNTENS_C (1 << QUADD_ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define QUADD_ARMV7_CNTENC_P(i) (1 << i) +#define QUADD_ARMV7_CNTENC_C (1 << QUADD_ARMV7_CCNT) + +/* + * Performance Counter Selection Register mask + */ +#define QUADD_ARMV7_SELECT_MASK 0x1f + +/* + * EVTSEL Register mask + */ +#define QUADD_ARMV7_EVTSEL_MASK 0xff + +#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A5 0x03 +#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A8 0x0f +#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9 0x3f +#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15 0x3f + +enum quadd_armv7_common_events { + QUADD_ARMV7_HW_EVENT_PMNC_SW_INCR = 0x00, + QUADD_ARMV7_HW_EVENT_IFETCH_MISS = 0x01, + QUADD_ARMV7_HW_EVENT_ITLB_MISS = 0x02, + QUADD_ARMV7_HW_EVENT_DCACHE_REFILL = 0x03, + QUADD_ARMV7_HW_EVENT_DCACHE_ACCESS = 0x04, + QUADD_ARMV7_HW_EVENT_DTLB_REFILL = 0x05, + QUADD_ARMV7_HW_EVENT_DREAD = 0x06, + QUADD_ARMV7_HW_EVENT_DWRITE = 0x07, + QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED = 0x08, + QUADD_ARMV7_HW_EVENT_EXC_TAKEN = 0x09, + QUADD_ARMV7_HW_EVENT_EXC_EXECUTED = 0x0A, + QUADD_ARMV7_HW_EVENT_CID_WRITE = 0x0B, + QUADD_ARMV7_HW_EVENT_PC_WRITE = 0x0C, + QUADD_ARMV7_HW_EVENT_PC_IMM_BRANCH = 0x0D, + QUADD_ARMV7_HW_EVENT_PC_PROC_RETURN = 0x0E, + QUADD_ARMV7_HW_EVENT_UNALIGNED_ACCESS = 0x0F, + + QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED = 0x10, + QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES = 0x11, + QUADD_ARMV7_HW_EVENT_PC_BRANCH_PRED = 0x12, + QUADD_ARMV7_HW_EVENT_MEM_ACCESS = 0x13, + QUADD_ARMV7_HW_EVENT_L1_ICACHE_ACCESS = 0x14, + QUADD_ARMV7_HW_EVENT_L1_DCACHE_WB = 0x15, + QUADD_ARMV7_HW_EVENT_L2_DCACHE_ACCESS = 0x16, + QUADD_ARMV7_HW_EVENT_L2_DCACHE_REFILL = 0x17, + QUADD_ARMV7_HW_EVENT_L2_DCACHE_WB = 0x18, + QUADD_ARMV7_HW_EVENT_BUS_ACCESS = 0x19, + QUADD_ARMV7_HW_EVENT_MEMORY_ERROR = 0x1A, + QUADD_ARMV7_HW_EVENT_INSTR_SPEC = 0x1B, + QUADD_ARMV7_HW_EVENT_TTBR_WRITE = 0x1C, + QUADD_ARMV7_HW_EVENT_BUS_CYCLES = 0x1D, +}; + +enum quadd_armv7_a8_specific_events { + QUADD_ARMV7_A8_HW_EVENT_WRITE_BUFFER_FULL = 0x40, + QUADD_ARMV7_A8_HW_EVENT_L2_STORE_MERGED = 0x41, + QUADD_ARMV7_A8_HW_EVENT_L2_STORE_BUFF = 0x42, + QUADD_ARMV7_A8_HW_EVENT_L2_ACCESS = 0x43, + QUADD_ARMV7_A8_HW_EVENT_L2_CACH_MISS = 0x44, + QUADD_ARMV7_A8_HW_EVENT_AXI_READ_CYCLES = 0x45, + QUADD_ARMV7_A8_HW_EVENT_AXI_WRITE_CYCLES = 0x46, + QUADD_ARMV7_A8_HW_EVENT_MEMORY_REPLAY = 0x47, + QUADD_ARMV7_A8_HW_EVENT_UNALIGNED_ACCESS_REPLAY = 0x48, + QUADD_ARMV7_A8_HW_EVENT_L1_DATA_MISS = 0x49, + QUADD_ARMV7_A8_HW_EVENT_L1_INST_MISS = 0x4A, + QUADD_ARMV7_A8_HW_EVENT_L1_DATA_COLORING = 0x4B, + QUADD_ARMV7_A8_HW_EVENT_L1_NEON_DATA = 0x4C, + QUADD_ARMV7_A8_HW_EVENT_L1_NEON_CACH_DATA = 0x4D, + QUADD_ARMV7_A8_HW_EVENT_L2_NEON = 0x4E, + QUADD_ARMV7_A8_HW_EVENT_L2_NEON_HIT = 0x4F, + QUADD_ARMV7_A8_HW_EVENT_L1_INST = 0x50, + QUADD_ARMV7_A8_HW_EVENT_PC_RETURN_MIS_PRED = 0x51, + QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_FAILED = 0x52, + QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_TAKEN = 0x53, + QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_EXECUTED = 0x54, + QUADD_ARMV7_A8_HW_EVENT_OP_EXECUTED = 0x55, + QUADD_ARMV7_A8_HW_EVENT_CYCLES_INST_STALL = 0x56, + QUADD_ARMV7_A8_HW_EVENT_CYCLES_INST = 0x57, + QUADD_ARMV7_A8_HW_EVENT_CYCLES_NEON_DATA_STALL = 0x58, + QUADD_ARMV7_A8_HW_EVENT_CYCLES_NEON_INST_STALL = 0x59, + QUADD_ARMV7_A8_HW_EVENT_NEON_CYCLES = 0x5A, + + QUADD_ARMV7_A8_HW_EVENT_PMU0_EVENTS = 0x70, + QUADD_ARMV7_A8_HW_EVENT_PMU1_EVENTS = 0x71, + QUADD_ARMV7_A8_HW_EVENT_PMU_EVENTS = 0x72, +}; + +enum quadd_armv7_a9_specific_events { + QUADD_ARMV7_A9_HW_EVENT_JAVA_HW_BYTECODE_EXEC = 0x40, + QUADD_ARMV7_A9_HW_EVENT_JAVA_SW_BYTECODE_EXEC = 0x41, + QUADD_ARMV7_A9_HW_EVENT_JAZELLE_BRANCH_EXEC = 0x42, + + QUADD_ARMV7_A9_HW_EVENT_COHERENT_LINE_MISS = 0x50, + QUADD_ARMV7_A9_HW_EVENT_COHERENT_LINE_HIT = 0x51, + + QUADD_ARMV7_A9_HW_EVENT_ICACHE_DEP_STALL_CYCLES = 0x60, + QUADD_ARMV7_A9_HW_EVENT_DCACHE_DEP_STALL_CYCLES = 0x61, + QUADD_ARMV7_A9_HW_EVENT_TLB_MISS_DEP_STALL_CYCLES = 0x62, + QUADD_ARMV7_A9_HW_EVENT_STREX_EXECUTED_PASSED = 0x63, + QUADD_ARMV7_A9_HW_EVENT_STREX_EXECUTED_FAILED = 0x64, + QUADD_ARMV7_A9_HW_EVENT_DATA_EVICTION = 0x65, + QUADD_ARMV7_A9_HW_EVENT_ISSUE_STAGE_NO_INST = 0x66, + QUADD_ARMV7_A9_HW_EVENT_ISSUE_STAGE_EMPTY = 0x67, + QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE = 0x68, + + QUADD_ARMV7_A9_HW_EVENT_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + QUADD_ARMV7_A9_HW_EVENT_MAIN_UNIT_EXECUTED_INST = 0x70, + QUADD_ARMV7_A9_HW_EVENT_SECOND_UNIT_EXECUTED_INST = 0x71, + QUADD_ARMV7_A9_HW_EVENT_LD_ST_UNIT_EXECUTED_INST = 0x72, + QUADD_ARMV7_A9_HW_EVENT_FP_EXECUTED_INST = 0x73, + QUADD_ARMV7_A9_HW_EVENT_NEON_EXECUTED_INST = 0x74, + + QUADD_ARMV7_A9_HW_EVENT_PLD_FULL_DEP_STALL_CYCLES = 0x80, + QUADD_ARMV7_A9_HW_EVENT_DATA_WR_DEP_STALL_CYCLES = 0x81, + QUADD_ARMV7_A9_HW_EVENT_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + QUADD_ARMV7_A9_HW_EVENT_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + QUADD_ARMV7_A9_HW_EVENT_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + QUADD_ARMV7_A9_HW_EVENT_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + QUADD_ARMV7_A9_HW_EVENT_DMB_DEP_STALL_CYCLES = 0x86, + + QUADD_ARMV7_A9_HW_EVENT_INTGR_CLK_ENABLED_CYCLES = 0x8A, + QUADD_ARMV7_A9_HW_EVENT_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + QUADD_ARMV7_A9_HW_EVENT_ISB_INST = 0x90, + QUADD_ARMV7_A9_HW_EVENT_DSB_INST = 0x91, + QUADD_ARMV7_A9_HW_EVENT_DMB_INST = 0x92, + QUADD_ARMV7_A9_HW_EVENT_EXT_INTERRUPTS = 0x93, + + QUADD_ARMV7_A9_HW_EVENT_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + QUADD_ARMV7_A9_HW_EVENT_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + QUADD_ARMV7_A9_HW_EVENT_PLE_FIFO_FLUSH = 0xA2, + QUADD_ARMV7_A9_HW_EVENT_PLE_RQST_COMPLETED = 0xA3, + QUADD_ARMV7_A9_HW_EVENT_PLE_FIFO_OVERFLOW = 0xA4, + QUADD_ARMV7_A9_HW_EVENT_PLE_RQST_PROG = 0xA5 +}; + +enum quadd_armv7_a5_specific_events { + QUADD_ARMV7_A5_HW_EVENT_IRQ_TAKEN = 0x86, + QUADD_ARMV7_A5_HW_EVENT_FIQ_TAKEN = 0x87, + + QUADD_ARMV7_A5_HW_EVENT_EXT_MEM_RQST = 0xc0, + QUADD_ARMV7_A5_HW_EVENT_NC_EXT_MEM_RQST = 0xc1, + QUADD_ARMV7_A5_HW_EVENT_PREFETCH_LINEFILL = 0xc2, + QUADD_ARMV7_A5_HW_EVENT_PREFETCH_LINEFILL_DROP = 0xc3, + QUADD_ARMV7_A5_HW_EVENT_ENTER_READ_ALLOC = 0xc4, + QUADD_ARMV7_A5_HW_EVENT_READ_ALLOC = 0xc5, + + QUADD_ARMV7_A5_HW_EVENT_STALL_SB_FULL = 0xc9, +}; + +enum quadd_armv7_a15_specific_events { + QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_ACCESS = 0x40, + QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_ACCESS = 0x41, + QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL = 0x42, + QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL = 0x43, + + QUADD_ARMV7_A15_HW_EVENT_L1_DTLB_READ_REFILL = 0x4C, + QUADD_ARMV7_A15_HW_EVENT_L1_DTLB_WRITE_REFILL = 0x4D, + + QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_ACCESS = 0x50, + QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_ACCESS = 0x51, + QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL = 0x52, + QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL = 0x53, + + QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE = 0x76, +}; + +#define QUADD_ARMV7_UNSUPPORTED_EVENT 0xff00 +#define QUADD_ARMV7_CPU_CYCLE_EVENT 0xffff + +void quadd_pmu_test(void); + +#endif /* __ARMV7_PMU_H */ diff --git a/drivers/misc/tegra-profiler/auth.c b/drivers/misc/tegra-profiler/auth.c new file mode 100644 index 000000000000..654f401b672a --- /dev/null +++ b/drivers/misc/tegra-profiler/auth.c @@ -0,0 +1,336 @@ +/* + * drivers/misc/tegra-profiler/auth.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/uaccess.h> + +#include "auth.h" +#include "quadd.h" +#include "debug.h" + +#define QUADD_SECURITY_MAGIC_REQUEST 0x11112222 +#define QUADD_SECURITY_MAGIC_RESPONSE 0x33334444 + +#define QUADD_TIMEOUT 1000 /* msec */ + +enum { + QUADD_SECURITY_RESPONSE_ERROR = 0, + QUADD_SECURITY_RESPONSE_DEBUG_FLAG_ON = 1, + QUADD_SECURITY_RESPONSE_DEBUG_FLAG_OFF = 2, + QUADD_SECURITY_RESPONSE_PACKAGE_NOT_FOUND = 3, +}; + +enum { + QUADD_SECURITY_REQUEST_CMD_TEST_DEBUG_FLAG = 1, + QUADD_SECURITY_RESPONSE_CMD_TEST_DEBUG_FLAG = 2, +}; + +struct quadd_auth_data { + char package_name[QUADD_MAX_PACKAGE_NAME]; + + uid_t debug_app_uid; + int response_value; +}; + +static struct quadd_auth_context { + struct miscdevice misc_dev; + + atomic_t opened; + + wait_queue_head_t request_wait; + wait_queue_head_t response_wait; + + int request_ready; + int response_ready; + struct quadd_auth_data data; + struct mutex lock; + + unsigned int msg_id; + + struct quadd_ctx *quadd_ctx; +} auth_ctx; + +static inline void response_ready(void) +{ + auth_ctx.response_ready = 1; + wake_up_interruptible(&auth_ctx.response_wait); +} + +static inline void request_ready(void) +{ + auth_ctx.request_ready = 1; + wake_up_interruptible(&auth_ctx.request_wait); +} + +static int auth_open(struct inode *inode, struct file *file) +{ + struct quadd_auth_data *data = &auth_ctx.data; + + if (atomic_cmpxchg(&auth_ctx.opened, 0, 1)) { + pr_err("Error: auth file is already opened\n"); + return -EBUSY; + } + pr_info("auth is opened\n"); + + auth_ctx.request_ready = 0; + auth_ctx.response_ready = 0; + + mutex_lock(&auth_ctx.lock); + data->package_name[0] = '\0'; + data->debug_app_uid = 0; + data->response_value = 0; + mutex_unlock(&auth_ctx.lock); + + return 0; +} + +static int auth_release(struct inode *inode, struct file *file) +{ + pr_info("auth is released\n"); + atomic_set(&auth_ctx.opened, 0); + return 0; +} + +static ssize_t +auth_read(struct file *filp, + char __user *user_buf, + size_t length, + loff_t *offset) +{ + char buf[QUADD_MAX_PACKAGE_NAME + 4 * sizeof(u32)]; + int msg_length, err; + struct quadd_auth_data *data = &auth_ctx.data; + + wait_event_interruptible(auth_ctx.request_wait, auth_ctx.request_ready); + + mutex_lock(&auth_ctx.lock); + + ((u32 *)buf)[0] = QUADD_SECURITY_MAGIC_REQUEST; + ((u32 *)buf)[1] = ++auth_ctx.msg_id; + ((u32 *)buf)[2] = QUADD_SECURITY_REQUEST_CMD_TEST_DEBUG_FLAG; + ((u32 *)buf)[3] = strlen(data->package_name); + + strcpy(buf + 4 * sizeof(u32), data->package_name); + msg_length = strlen(data->package_name) + 4 * sizeof(u32); + + mutex_unlock(&auth_ctx.lock); + + err = copy_to_user(user_buf, buf, msg_length); + if (err != 0) { + pr_err("Error: copy to user: %d\n", err); + return err; + } + + pr_info("auth read, msg_length: %d\n", msg_length); + return msg_length; +} + +static ssize_t +auth_write(struct file *file, + const char __user *user_buf, + size_t count, + loff_t *ppos) +{ + int err; + char buf[5 * sizeof(u32)]; + u32 magic, response_cmd, response_value, length, uid, msg_id; + struct quadd_auth_data *data = &auth_ctx.data; + + pr_info("auth read, count: %d\n", count); + + mutex_lock(&auth_ctx.lock); + data->response_value = QUADD_SECURITY_RESPONSE_ERROR; + data->debug_app_uid = 0; + mutex_unlock(&auth_ctx.lock); + + if (count < 5 * sizeof(u32)) { + pr_err("Error count: %u\n", count); + response_ready(); + return -E2BIG; + } + + err = copy_from_user(buf, user_buf, 5 * sizeof(u32)); + if (err) { + pr_err("Error: copy from user: %d\n", err); + response_ready(); + return err; + } + + magic = ((u32 *)buf)[0]; + if (magic != QUADD_SECURITY_MAGIC_RESPONSE) { + pr_err("Error magic: %#x\n", magic); + response_ready(); + return -EINVAL; + } + + msg_id = ((u32 *)buf)[1]; + if (msg_id != auth_ctx.msg_id) { + pr_err("Error message id: %u\n", msg_id); + response_ready(); + return -EINVAL; + } + + response_cmd = ((u32 *)buf)[2]; + response_value = ((u32 *)buf)[3]; + length = ((u32 *)buf)[4]; + + switch (response_cmd) { + case QUADD_SECURITY_RESPONSE_CMD_TEST_DEBUG_FLAG: + if (length != 4) { + pr_err("Error: too long data: %u\n", length); + response_ready(); + return -E2BIG; + } + + err = get_user(uid, (u32 __user *)user_buf + 5); + if (err) { + pr_err("Error: copy from user: %d\n", err); + response_ready(); + return err; + } + + mutex_lock(&auth_ctx.lock); + data->response_value = response_value; + data->debug_app_uid = uid; + mutex_unlock(&auth_ctx.lock); + + pr_info("uid: %u, response_value: %u\n", + uid, response_value); + break; + + default: + pr_err("Error: invalid response command: %u\n", + response_cmd); + response_ready(); + return -EINVAL; + } + response_ready(); + + return count; +} + +static const struct file_operations auth_fops = { + .read = auth_read, + .write = auth_write, + .open = auth_open, + .release = auth_release, +}; + +int quadd_auth_check_debug_flag(const char *package_name) +{ + int uid, response_value; + struct quadd_auth_data *data = &auth_ctx.data; + int pkg_name_length; + + if (!package_name) + return -EINVAL; + + pkg_name_length = strlen(package_name); + if (pkg_name_length == 0 || + pkg_name_length > QUADD_MAX_PACKAGE_NAME) + return -EINVAL; + + if (atomic_read(&auth_ctx.opened) == 0) + return -EIO; + + mutex_lock(&auth_ctx.lock); + data->debug_app_uid = 0; + data->response_value = 0; + + strncpy(data->package_name, package_name, QUADD_MAX_PACKAGE_NAME); + mutex_unlock(&auth_ctx.lock); + + request_ready(); + + wait_event_interruptible_timeout(auth_ctx.response_wait, + auth_ctx.response_ready, + msecs_to_jiffies(QUADD_TIMEOUT)); + if (!auth_ctx.response_ready) { + pr_err("Error: Tegra profiler service did not answer\n"); + return -ETIMEDOUT; + } + + mutex_lock(&auth_ctx.lock); + uid = data->debug_app_uid; + response_value = data->response_value; + mutex_unlock(&auth_ctx.lock); + + switch (response_value) { + case QUADD_SECURITY_RESPONSE_DEBUG_FLAG_ON: + pr_info("package %s is debuggable, uid: %d\n", + package_name, uid); + return uid; + + case QUADD_SECURITY_RESPONSE_DEBUG_FLAG_OFF: + pr_info("package %s is not debuggable\n", + package_name); + return 0; + + case QUADD_SECURITY_RESPONSE_PACKAGE_NOT_FOUND: + pr_err("Error: package %s not found\n", package_name); + return -ESRCH; + + case QUADD_SECURITY_RESPONSE_ERROR: + default: + pr_err("Error: invalid response\n"); + return -EBADMSG; + } +} + +int quadd_auth_init(struct quadd_ctx *quadd_ctx) +{ + int err; + struct miscdevice *misc_dev = &auth_ctx.misc_dev; + + pr_info("auth: init\n"); + + misc_dev->minor = MISC_DYNAMIC_MINOR; + misc_dev->name = QUADD_AUTH_DEVICE_NAME; + misc_dev->fops = &auth_fops; + + err = misc_register(misc_dev); + if (err < 0) { + pr_err("Error: misc_register %d\n", err); + return err; + } + + init_waitqueue_head(&auth_ctx.request_wait); + init_waitqueue_head(&auth_ctx.response_wait); + + auth_ctx.request_ready = 0; + auth_ctx.response_ready = 0; + + atomic_set(&auth_ctx.opened, 0); + mutex_init(&auth_ctx.lock); + auth_ctx.msg_id = 0; + + auth_ctx.quadd_ctx = quadd_ctx; + return 0; +} + +void quadd_auth_deinit(void) +{ + struct miscdevice *misc_dev = &auth_ctx.misc_dev; + + pr_info("auth: deinit\n"); + misc_deregister(misc_dev); +} diff --git a/drivers/misc/tegra-profiler/auth.h b/drivers/misc/tegra-profiler/auth.h new file mode 100644 index 000000000000..aa810f2d5a63 --- /dev/null +++ b/drivers/misc/tegra-profiler/auth.h @@ -0,0 +1,27 @@ +/* + * drivers/misc/tegra-profiler/auth.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_AUTH_H__ +#define __QUADD_AUTH_H__ + +struct quadd_ctx; + +int quadd_auth_check_debug_flag(const char *package_name); + +int quadd_auth_init(struct quadd_ctx *quadd_ctx); +void quadd_auth_deinit(void); + +#endif /* __QUADD_AUTH_H__ */ diff --git a/drivers/misc/tegra-profiler/backtrace.c b/drivers/misc/tegra-profiler/backtrace.c new file mode 100644 index 000000000000..dd7c67cded5a --- /dev/null +++ b/drivers/misc/tegra-profiler/backtrace.c @@ -0,0 +1,167 @@ +/* + * drivers/misc/tegra-profiler/backtrace.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <asm-generic/uaccess.h> + +#include <linux/tegra_profiler.h> + +#include "backtrace.h" + +#define QUADD_USER_SPACE_MIN_ADDR 0x8000 + +static inline void +quadd_callchain_store(struct quadd_callchain *callchain_data, u32 ip) +{ + if (callchain_data->nr < QUADD_MAX_STACK_DEPTH) { + /* pr_debug("[%d] Add entry: %#llx\n", + callchain_data->nr, ip); */ + callchain_data->callchain[callchain_data->nr++] = ip; + } +} + +static int +check_vma_address(unsigned long addr, struct vm_area_struct *vma) +{ + unsigned long start, end; + + if (vma) { + start = vma->vm_start; + end = vma->vm_end; + if (addr >= start && addr + sizeof(unsigned long) <= end) + return 0; + } + return -EINVAL; +} + +static unsigned long __user * +user_backtrace(unsigned long __user *tail, + struct quadd_callchain *callchain_data, + struct vm_area_struct *stack_vma) +{ + unsigned long value, value_lr = 0, value_fp = 0; + unsigned long __user *fp_prev = NULL; + + if (check_vma_address((unsigned long)tail, stack_vma)) + return NULL; + + if (__copy_from_user_inatomic(&value, tail, sizeof(unsigned long))) + return NULL; + + if (!check_vma_address(value, stack_vma)) { + /* clang's frame */ + value_fp = value; + + if (check_vma_address((unsigned long)(tail + 1), stack_vma)) + return NULL; + + if (__copy_from_user_inatomic(&value_lr, tail + 1, + sizeof(unsigned long))) + return NULL; + } else { + /* gcc's frame */ + if (__copy_from_user_inatomic(&value_fp, tail - 1, + sizeof(unsigned long))) + return NULL; + + if (check_vma_address(value_fp, stack_vma)) + return NULL; + + value_lr = value; + } + + fp_prev = (unsigned long __user *)value_fp; + + if (value_lr < QUADD_USER_SPACE_MIN_ADDR) + return NULL; + + quadd_callchain_store(callchain_data, value_lr); + + if (fp_prev <= tail) + return NULL; + + return fp_prev; +} + +unsigned int +quadd_get_user_callchain(struct pt_regs *regs, + struct quadd_callchain *callchain_data) +{ + unsigned long fp, sp, pc, reg; + struct vm_area_struct *vma, *vma_pc; + unsigned long __user *tail = NULL; + struct mm_struct *mm = current->mm; + + callchain_data->nr = 0; + + if (!regs || !user_mode(regs) || !mm) + return 0; + + if (thumb_mode(regs)) + return 0; + + fp = regs->ARM_fp; + sp = regs->ARM_sp; + pc = regs->ARM_pc; + + if (fp == 0 || fp < sp || fp & 0x3) + return 0; + + vma = find_vma(mm, sp); + if (check_vma_address(fp, vma)) + return 0; + + if (__copy_from_user_inatomic(®, (unsigned long __user *)fp, + sizeof(unsigned long))) + return 0; + + if (reg > fp && + !check_vma_address(reg, vma)) { + unsigned long value; + int read_lr = 0; + + if (!check_vma_address(fp + sizeof(unsigned long), vma)) { + if (__copy_from_user_inatomic( + &value, + (unsigned long __user *)fp + 1, + sizeof(unsigned long))) + return 0; + + vma_pc = find_vma(mm, pc); + read_lr = 1; + } + + if (!read_lr || check_vma_address(value, vma_pc)) { + /* gcc: fp --> short frame tail (fp) */ + + if (regs->ARM_lr < QUADD_USER_SPACE_MIN_ADDR) + return 0; + + quadd_callchain_store(callchain_data, regs->ARM_lr); + tail = (unsigned long __user *)reg; + } + } + + if (!tail) + tail = (unsigned long __user *)fp; + + while (tail && !((unsigned long)tail & 0x3)) + tail = user_backtrace(tail, callchain_data, vma); + + return callchain_data->nr; +} diff --git a/drivers/misc/tegra-profiler/backtrace.h b/drivers/misc/tegra-profiler/backtrace.h new file mode 100644 index 000000000000..82b55db496f0 --- /dev/null +++ b/drivers/misc/tegra-profiler/backtrace.h @@ -0,0 +1,34 @@ +/* + * drivers/misc/tegra-profiler/backtrace.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_BACKTRACE_H +#define __QUADD_BACKTRACE_H + +#include <linux/types.h> + +#define QUADD_MAX_STACK_DEPTH 64 + +struct quadd_callchain { + int nr; + u32 callchain[QUADD_MAX_STACK_DEPTH]; +}; + +unsigned int +quadd_get_user_callchain(struct pt_regs *regs, + struct quadd_callchain *callchain_data); + + +#endif /* __QUADD_BACKTRACE_H */ diff --git a/drivers/misc/tegra-profiler/comm.c b/drivers/misc/tegra-profiler/comm.c new file mode 100644 index 000000000000..36bfa671893e --- /dev/null +++ b/drivers/misc/tegra-profiler/comm.c @@ -0,0 +1,650 @@ +/* + * drivers/misc/tegra-profiler/comm.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/fs.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/miscdevice.h> +#include <linux/sched.h> + +#include <linux/tegra_profiler.h> + +#include "comm.h" +#include "version.h" + +#define QUADD_SIZE_RB_BUFFER (0x100000 * 8) /* 8 MB */ + +struct quadd_comm_ctx comm_ctx; + +static inline void *rb_alloc(unsigned long size) +{ + return vmalloc(size); +} + +static inline void rb_free(void *addr) +{ + vfree(addr); +} + +static void rb_reset(struct quadd_ring_buffer *rb) +{ + rb->pos_read = 0; + rb->pos_write = 0; + rb->fill_count = 0; +} + +static int rb_init(struct quadd_ring_buffer *rb, size_t size) +{ + spin_lock_init(&rb->lock); + + rb->size = size; + rb->buf = NULL; + + rb->buf = (char *) rb_alloc(rb->size); + if (!rb->buf) { + pr_err("Ring buffer alloc error\n"); + return 1; + } + pr_debug("data buffer size: %u\n", (unsigned int)rb->size); + + rb_reset(rb); + + return 0; +} + +static void rb_deinit(struct quadd_ring_buffer *rb) +{ + unsigned long flags; + + spin_lock_irqsave(&rb->lock, flags); + if (rb->buf) { + rb_reset(rb); + + rb_free(rb->buf); + rb->buf = NULL; + } + spin_unlock_irqrestore(&rb->lock, flags); +} + +static __attribute__((unused)) int rb_is_full(struct quadd_ring_buffer *rb) +{ + return rb->fill_count == rb->size; +} + +static int rb_is_empty(struct quadd_ring_buffer *rb) +{ + return rb->fill_count == 0; +} + +static size_t +rb_get_free_space(struct quadd_ring_buffer *rb) +{ + return rb->size - rb->fill_count; +} + +static size_t +rb_write(struct quadd_ring_buffer *rb, char *data, size_t length) +{ + size_t new_pos_write, chunk1; + + if (length > rb_get_free_space(rb)) + return 0; + + new_pos_write = (rb->pos_write + length) % rb->size; + + if (new_pos_write < rb->pos_write) { + chunk1 = rb->size - rb->pos_write; + memcpy(rb->buf + rb->pos_write, data, chunk1); + if (new_pos_write > 0) + memcpy(rb->buf, data + chunk1, new_pos_write); + } else { + memcpy(rb->buf + rb->pos_write, data, length); + } + + rb->pos_write = new_pos_write; + rb->fill_count += length; + + return length; +} + +static size_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length) +{ + if (rb_get_free_space(rb) < length) + return 0; + + if (rb->pos_read > length) + rb->pos_read -= length; + else + rb->pos_read += rb->size - length; + + rb->fill_count += sizeof(struct quadd_record_data); + return length; +} + +static size_t rb_read(struct quadd_ring_buffer *rb, char *data, size_t length) +{ + unsigned int new_pos_read, chunk1; + + if (length > rb->fill_count) + return 0; + + new_pos_read = (rb->pos_read + length) % rb->size; + + if (new_pos_read < rb->pos_read) { + chunk1 = rb->size - rb->pos_read; + memcpy(data, rb->buf + rb->pos_read, chunk1); + if (new_pos_read > 0) + memcpy(data + chunk1, rb->buf, new_pos_read); + } else { + memcpy(data, rb->buf + rb->pos_read, length); + } + + rb->pos_read = new_pos_read; + rb->fill_count -= length; + + return length; +} + +static size_t +rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length) +{ + size_t new_pos_read, chunk1; + + if (length > rb->fill_count) + return 0; + + new_pos_read = (rb->pos_read + length) % rb->size; + + if (new_pos_read < rb->pos_read) { + chunk1 = rb->size - rb->pos_read; + if (copy_to_user(data, rb->buf + rb->pos_read, chunk1)) { + pr_err_once("Error: copy_to_user\n"); + return 0; + } + + if (new_pos_read > 0) { + if (copy_to_user(data + chunk1, rb->buf, + new_pos_read)) { + pr_err_once("Error: copy_to_user\n"); + return 0; + } + } + } else { + if (copy_to_user(data, rb->buf + rb->pos_read, length)) { + pr_err_once("Error: copy_to_user\n"); + return 0; + } + } + + rb->pos_read = new_pos_read; + rb->fill_count -= length; + + return length; +} + +static void +write_sample(struct quadd_record_data *sample, void *extra_data, + size_t extra_length) +{ + unsigned long flags; + struct quadd_ring_buffer *rb = &comm_ctx.rb; + int length_sample = sizeof(struct quadd_record_data) + extra_length; + + spin_lock_irqsave(&rb->lock, flags); + + if (length_sample > rb_get_free_space(rb)) { + pr_err_once("Error: Buffer overflowed, skip sample\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return; + } + + if (!rb_write(rb, (char *)sample, sizeof(struct quadd_record_data))) { + spin_unlock_irqrestore(&rb->lock, flags); + return; + } + + if (extra_data && extra_length > 0) { + if (!rb_write(rb, extra_data, extra_length)) { + pr_err_once("Buffer overflowed, skip sample\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return; + } + } + spin_unlock_irqrestore(&rb->lock, flags); +} + +static int read_sample(char __user *buffer, size_t max_length) +{ + unsigned long flags; + struct quadd_ring_buffer *rb = &comm_ctx.rb; + struct quadd_record_data record; + size_t length_extra = 0; + + spin_lock_irqsave(&rb->lock, flags); + + if (rb_is_empty(rb)) { + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (rb->fill_count < sizeof(struct quadd_record_data)) { + pr_err_once("Error: data\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data))) { + pr_err_once("Error: read sample\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (record.magic != QUADD_RECORD_MAGIC) { + pr_err_once("Bad magic: %#x\n", record.magic); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + switch (record.record_type) { + case QUADD_RECORD_TYPE_SAMPLE: + length_extra = record.sample.callchain_nr * + sizeof(record.sample.ip); + break; + + case QUADD_RECORD_TYPE_MMAP: + if (record.mmap.filename_length > 0) { + length_extra = record.mmap.filename_length; + } else { + length_extra = 0; + pr_err_once("Error: filename\n"); + } + break; + + case QUADD_RECORD_TYPE_DEBUG: + case QUADD_RECORD_TYPE_HEADER: + case QUADD_RECORD_TYPE_MA: + length_extra = 0; + break; + + case QUADD_RECORD_TYPE_POWER_RATE: + length_extra = record.power_rate.nr_cpus * sizeof(u32); + break; + + case QUADD_RECORD_TYPE_ADDITIONAL_SAMPLE: + length_extra = record.additional_sample.extra_length; + break; + + default: + pr_err_once("Error: Unknown sample: %u\n", record.record_type); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (sizeof(struct quadd_record_data) + length_extra > max_length) { + if (!rb_read_undo(rb, sizeof(struct quadd_record_data))) + pr_err_once("Error: rb_read_undo\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (length_extra > rb_get_free_space(rb)) { + pr_err_once("Error: Incompleted sample\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data))) { + pr_err_once("Error: copy_to_user\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + + if (length_extra > 0) { + if (!rb_read_user(rb, buffer + sizeof(struct quadd_record_data), + length_extra)) { + pr_err_once("Error: copy_to_user\n"); + spin_unlock_irqrestore(&rb->lock, flags); + return 0; + } + } + + spin_unlock_irqrestore(&rb->lock, flags); + return sizeof(struct quadd_record_data) + length_extra; +} + +static void put_sample(struct quadd_record_data *data, char *extra_data, + unsigned int extra_length) +{ + if (!atomic_read(&comm_ctx.active)) + return; + + write_sample(data, extra_data, extra_length); +} + +static void comm_reset(void) +{ + unsigned long flags; + + pr_debug("Comm reset\n"); + spin_lock_irqsave(&comm_ctx.rb.lock, flags); + rb_reset(&comm_ctx.rb); + spin_unlock_irqrestore(&comm_ctx.rb.lock, flags); +} + +static struct quadd_comm_data_interface comm_data = { + .put_sample = put_sample, + .reset = comm_reset, +}; + +static int check_access_permission(void) +{ + struct task_struct *task; + + if (capable(CAP_SYS_ADMIN)) + return 0; + + if (!comm_ctx.params_ok || comm_ctx.process_pid == 0) + return -EACCES; + + rcu_read_lock(); + task = pid_task(find_vpid(comm_ctx.process_pid), PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + return -EACCES; + + if (current_fsuid() != task_uid(task) && + task_uid(task) != comm_ctx.debug_app_uid) { + pr_err("Permission denied, owner/task uids: %u/%u\n", + current_fsuid(), task_uid(task)); + return -EACCES; + } + return 0; +} + +static int device_open(struct inode *inode, struct file *file) +{ + mutex_lock(&comm_ctx.io_mutex); + comm_ctx.nr_users++; + mutex_unlock(&comm_ctx.io_mutex); + return 0; +} + +static int device_release(struct inode *inode, struct file *file) +{ + mutex_lock(&comm_ctx.io_mutex); + comm_ctx.nr_users--; + + if (comm_ctx.nr_users == 0) { + if (atomic_cmpxchg(&comm_ctx.active, 1, 0)) { + comm_ctx.control->stop(); + pr_info("Stop profiling: daemon is closed\n"); + } + } + mutex_unlock(&comm_ctx.io_mutex); + + return 0; +} + +static ssize_t +device_read(struct file *filp, + char __user *buffer, + size_t length, + loff_t *offset) +{ + int err; + size_t was_read = 0, res, samples_counter = 0; + + err = check_access_permission(); + if (err) + return err; + + mutex_lock(&comm_ctx.io_mutex); + + if (!atomic_read(&comm_ctx.active)) { + mutex_unlock(&comm_ctx.io_mutex); + return -1; + } + + while (was_read + sizeof(struct quadd_record_data) < length) { + res = read_sample(buffer + was_read, length - was_read); + if (res == 0) + break; + + was_read += res; + samples_counter++; + + if (!atomic_read(&comm_ctx.active)) + break; + } + + mutex_unlock(&comm_ctx.io_mutex); + return was_read; +} + +static long +device_ioctl(struct file *file, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + int err; + struct quadd_parameters user_params; + struct quadd_comm_cap cap; + struct quadd_module_state state; + struct quadd_module_version versions; + unsigned long flags; + struct quadd_ring_buffer *rb = &comm_ctx.rb; + + if (ioctl_num != IOCTL_SETUP && + ioctl_num != IOCTL_GET_CAP && + ioctl_num != IOCTL_GET_STATE && + ioctl_num != IOCTL_GET_VERSION) { + err = check_access_permission(); + if (err) + return err; + } + + mutex_lock(&comm_ctx.io_mutex); + + switch (ioctl_num) { + case IOCTL_SETUP: + if (atomic_read(&comm_ctx.active)) { + pr_err("error: tegra profiler is active\n"); + mutex_unlock(&comm_ctx.io_mutex); + return -EBUSY; + } + + if (copy_from_user(&user_params, (void __user *)ioctl_param, + sizeof(struct quadd_parameters))) { + pr_err("setup failed\n"); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + + err = comm_ctx.control->set_parameters(&user_params, + &comm_ctx.debug_app_uid); + if (err) { + pr_err("error: setup failed\n"); + mutex_unlock(&comm_ctx.io_mutex); + return err; + } + comm_ctx.params_ok = 1; + comm_ctx.process_pid = user_params.pids[0]; + + pr_info("setup success: freq/mafreq: %u/%u, backtrace: %d, pid: %d\n", + user_params.freq, + user_params.ma_freq, + user_params.backtrace, + user_params.pids[0]); + break; + + case IOCTL_GET_CAP: + comm_ctx.control->get_capabilities(&cap); + if (copy_to_user((void __user *)ioctl_param, &cap, + sizeof(struct quadd_comm_cap))) { + pr_err("error: get_capabilities failed\n"); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + break; + + case IOCTL_GET_VERSION: + strcpy(versions.branch, QUADD_MODULE_BRANCH); + strcpy(versions.version, QUADD_MODULE_VERSION); + + versions.samples_version = QUADD_SAMPLES_VERSION; + versions.io_version = QUADD_IO_VERSION; + + if (copy_to_user((void __user *)ioctl_param, &versions, + sizeof(struct quadd_module_version))) { + pr_err("error: get version failed\n"); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + break; + + case IOCTL_GET_STATE: + comm_ctx.control->get_state(&state); + + state.buffer_size = QUADD_SIZE_RB_BUFFER; + + spin_lock_irqsave(&rb->lock, flags); + state.buffer_fill_size = + QUADD_SIZE_RB_BUFFER - rb_get_free_space(rb); + spin_unlock_irqrestore(&rb->lock, flags); + + if (copy_to_user((void __user *)ioctl_param, &state, + sizeof(struct quadd_module_state))) { + pr_err("error: get_state failed\n"); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + break; + + case IOCTL_START: + if (!atomic_cmpxchg(&comm_ctx.active, 0, 1)) { + if (!comm_ctx.params_ok) { + pr_err("error: params failed\n"); + atomic_set(&comm_ctx.active, 0); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + + if (comm_ctx.control->start()) { + pr_err("error: start failed\n"); + atomic_set(&comm_ctx.active, 0); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + pr_info("Start profiling success\n"); + } + break; + + case IOCTL_STOP: + if (atomic_cmpxchg(&comm_ctx.active, 1, 0)) { + comm_ctx.control->stop(); + pr_info("Stop profiling success\n"); + } + break; + + default: + pr_err("error: ioctl %u is unsupported in this version of module\n", + ioctl_num); + mutex_unlock(&comm_ctx.io_mutex); + return -EFAULT; + } + mutex_unlock(&comm_ctx.io_mutex); + + return 0; +} + +static void unregister(void) +{ + misc_deregister(comm_ctx.misc_dev); + kfree(comm_ctx.misc_dev); +} + +static void free_ctx(void) +{ + rb_deinit(&comm_ctx.rb); +} + +static const struct file_operations qm_fops = { + .read = device_read, + .open = device_open, + .release = device_release, + .unlocked_ioctl = device_ioctl +}; + +static int comm_init(void) +{ + int res; + struct miscdevice *misc_dev; + struct quadd_ring_buffer *rb = &comm_ctx.rb; + + misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL); + if (!misc_dev) { + pr_err("Error: alloc error\n"); + return -ENOMEM; + } + + misc_dev->minor = MISC_DYNAMIC_MINOR; + misc_dev->name = QUADD_DEVICE_NAME; + misc_dev->fops = &qm_fops; + + res = misc_register(misc_dev); + if (res < 0) { + pr_err("Error: misc_register %d\n", res); + return res; + } + comm_ctx.misc_dev = misc_dev; + + mutex_init(&comm_ctx.io_mutex); + atomic_set(&comm_ctx.active, 0); + + comm_ctx.params_ok = 0; + comm_ctx.process_pid = 0; + comm_ctx.nr_users = 0; + + if (rb_init(rb, QUADD_SIZE_RB_BUFFER)) { + free_ctx(); + unregister(); + return -ENOMEM; + } + + return 0; +} + +struct quadd_comm_data_interface * +quadd_comm_events_init(struct quadd_comm_control_interface *control) +{ + if (comm_init() < 0) + return NULL; + + comm_ctx.control = control; + return &comm_data; +} + +void quadd_comm_events_exit(void) +{ + mutex_lock(&comm_ctx.io_mutex); + unregister(); + free_ctx(); + mutex_unlock(&comm_ctx.io_mutex); +} diff --git a/drivers/misc/tegra-profiler/comm.h b/drivers/misc/tegra-profiler/comm.h new file mode 100644 index 000000000000..da8e918da38b --- /dev/null +++ b/drivers/misc/tegra-profiler/comm.h @@ -0,0 +1,74 @@ +/* + * drivers/misc/tegra-profiler/comm.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_COMM_H__ +#define __QUADD_COMM_H__ + +#include <linux/types.h> + +struct quadd_record_data; +struct quadd_comm_cap; +struct quadd_module_state; +struct miscdevice; + +struct quadd_ring_buffer { + char *buf; + spinlock_t lock; + + size_t size; + size_t pos_read; + size_t pos_write; + size_t fill_count; +}; + +struct quadd_parameters; + +struct quadd_comm_control_interface { + int (*start)(void); + void (*stop)(void); + int (*set_parameters)(struct quadd_parameters *param, + uid_t *debug_app_uid); + void (*get_capabilities)(struct quadd_comm_cap *cap); + void (*get_state)(struct quadd_module_state *state); +}; + +struct quadd_comm_data_interface { + void (*put_sample)(struct quadd_record_data *data, char *extra_data, + unsigned int extra_length); + void (*reset)(void); +}; + +struct quadd_comm_ctx { + struct quadd_comm_control_interface *control; + struct quadd_ring_buffer rb; + + atomic_t active; + + struct mutex io_mutex; + int nr_users; + + int params_ok; + pid_t process_pid; + uid_t debug_app_uid; + + struct miscdevice *misc_dev; +}; + +struct quadd_comm_data_interface * +quadd_comm_events_init(struct quadd_comm_control_interface *control); +void quadd_comm_events_exit(void); + +#endif /* __QUADD_COMM_H__ */ diff --git a/drivers/misc/tegra-profiler/debug.c b/drivers/misc/tegra-profiler/debug.c new file mode 100644 index 000000000000..e0270a310ae4 --- /dev/null +++ b/drivers/misc/tegra-profiler/debug.c @@ -0,0 +1,164 @@ +/* + * drivers/misc/tegra-profiler/debug.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/module.h> +#include <asm/irq_regs.h> + +#include <linux/tegra_profiler.h> + +#include "debug.h" +#include "hrt.h" +#include "tegra.h" + +#ifdef QM_DEBUG_SAMPLES_ENABLE + +static inline void +init_sample(struct quadd_record_data *record, struct pt_regs *regs) +{ + struct quadd_debug_data *s = &record->debug; + + record->magic = QUADD_RECORD_MAGIC; + record->record_type = QUADD_RECORD_TYPE_DEBUG; + + if (!regs) + regs = get_irq_regs(); + + if (!regs) + record->cpu_mode = QUADD_CPU_MODE_NONE; + else + record->cpu_mode = user_mode(regs) ? + QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL; + + s->cpu = quadd_get_processor_id(); + s->pid = 0; + s->time = quadd_get_time(); + s->timer_period = 0; + + s->extra_value1 = 0; + s->extra_value2 = 0; + s->extra_value3 = 0; +} + +void qm_debug_handler_sample(struct pt_regs *regs) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, regs); + + s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_HANDLE; + + quadd_put_sample(&record, NULL, 0); +} + +void qm_debug_timer_forward(struct pt_regs *regs, u64 period) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, regs); + + s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_FORWARD; + s->timer_period = period; + + quadd_put_sample(&record, NULL, 0); +} + +void qm_debug_timer_start(struct pt_regs *regs, u64 period) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, regs); + + s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_START; + s->timer_period = period; + + quadd_put_sample(&record, NULL, 0); +} + +void qm_debug_timer_cancel(void) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, NULL); + + s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_CANCEL; + + quadd_put_sample(&record, NULL, 0); +} + +void +qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, NULL); + + s->type = QM_DEBUG_SAMPLE_TYPE_SCHED_IN; + + s->extra_value1 = prev_pid; + s->extra_value2 = current_pid; + s->extra_value3 = prev_nr_active; + + quadd_put_sample(&record, NULL, 0); +} + +void qm_debug_read_counter(int event_id, u32 prev_val, u32 val) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, NULL); + + s->type = QM_DEBUG_SAMPLE_TYPE_READ_COUNTER; + + s->extra_value1 = event_id; + s->extra_value2 = prev_val; + s->extra_value3 = val; + + quadd_put_sample(&record, NULL, 0); +} + +void qm_debug_start_source(int source_type) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, NULL); + + s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_START; + s->extra_value1 = source_type; + + quadd_put_sample(&record, NULL, 0); +} + +void qm_debug_stop_source(int source_type) +{ + struct quadd_record_data record; + struct quadd_debug_data *s = &record.debug; + + init_sample(&record, NULL); + + s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_STOP; + s->extra_value1 = source_type; + + quadd_put_sample(&record, NULL, 0); +} + +#endif /* QM_DEBUG_SAMPLES_ENABLE */ diff --git a/drivers/misc/tegra-profiler/debug.h b/drivers/misc/tegra-profiler/debug.h new file mode 100644 index 000000000000..ff62919eb243 --- /dev/null +++ b/drivers/misc/tegra-profiler/debug.h @@ -0,0 +1,87 @@ +/* + * drivers/misc/tegra-profiler/debug.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_DEBUG_H +#define __QUADD_DEBUG_H + +#include <linux/tegra_profiler.h> + +/* #define QM_DEBUG_SAMPLES_ENABLE 1 */ + +#ifdef QM_DEBUG_SAMPLES_ENABLE +void qm_debug_handler_sample(struct pt_regs *regs); +void qm_debug_timer_forward(struct pt_regs *regs, u64 period); +void qm_debug_timer_start(struct pt_regs *regs, u64 period); +void qm_debug_timer_cancel(void); +void qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, + int prev_nr_active); +void qm_debug_read_counter(int event_id, u32 prev_val, u32 val); +void qm_debug_start_source(int source_type); +void qm_debug_stop_source(int source_type); +#else +static inline void qm_debug_handler_sample(struct pt_regs *regs) +{ +} +static inline void qm_debug_timer_forward(struct pt_regs *regs, u64 period) +{ +} +static inline void qm_debug_timer_start(struct pt_regs *regs, u64 period) +{ +} +static inline void qm_debug_timer_cancel(void) +{ +} +static inline void +qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active) +{ +} +static inline void qm_debug_read_counter(int event_id, u32 prev_val, u32 val) +{ +} +static inline void qm_debug_start_source(int source_type) +{ +} +static inline void qm_debug_stop_source(int source_type) +{ +} +#endif + +void quadd_test_delay(void); + +#define QM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +static inline char * +quadd_get_event_str(int event) +{ + static char *str[] = { + [QUADD_EVENT_TYPE_CPU_CYCLES] = "cpu-cycles", + + [QUADD_EVENT_TYPE_INSTRUCTIONS] = "instructions", + [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] = "branch_instruction", + [QUADD_EVENT_TYPE_BRANCH_MISSES] = "branch_misses", + [QUADD_EVENT_TYPE_BUS_CYCLES] = "bus-cycles", + + [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] = "l1_d_read", + [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] = "l1_d_write", + [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] = "l1_i", + + [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] = "l2_d_read", + [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] = "l2_d_write", + [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] = "l2_i", + }; + return (event < QM_ARRAY_SIZE(str)) ? str[event] : "invalid event"; +} + +#endif /* __QUADD_DEBUG_H */ diff --git a/drivers/misc/tegra-profiler/hrt.c b/drivers/misc/tegra-profiler/hrt.c new file mode 100644 index 000000000000..56d8b84ae75d --- /dev/null +++ b/drivers/misc/tegra-profiler/hrt.c @@ -0,0 +1,620 @@ +/* + * drivers/misc/tegra-profiler/hrt.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/sched.h> +#include <asm/cputype.h> +#include <linux/hrtimer.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/ratelimit.h> +#include <asm/irq_regs.h> + +#include <linux/tegra_profiler.h> + +#include "quadd.h" +#include "hrt.h" +#include "comm.h" +#include "mmap.h" +#include "ma.h" +#include "power_clk.h" +#include "tegra.h" +#include "debug.h" + +static struct quadd_hrt_ctx hrt; + +static void read_all_sources(struct pt_regs *regs, pid_t pid); + +static void sample_time_prepare(void); +static void sample_time_finish(void); +static void sample_time_reset(struct quadd_cpu_context *cpu_ctx); + +static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer) +{ + struct pt_regs *regs; + + regs = get_irq_regs(); + + if (hrt.active == 0) + return HRTIMER_NORESTART; + + qm_debug_handler_sample(regs); + + if (regs) { + sample_time_prepare(); + read_all_sources(regs, -1); + sample_time_finish(); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period)); + qm_debug_timer_forward(regs, hrt.sample_period); + + return HRTIMER_RESTART; +} + +static void start_hrtimer(struct quadd_cpu_context *cpu_ctx) +{ + u64 period = hrt.sample_period; + + sample_time_reset(cpu_ctx); + + hrtimer_start(&cpu_ctx->hrtimer, ns_to_ktime(period), + HRTIMER_MODE_REL_PINNED); + qm_debug_timer_start(NULL, period); +} + +static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx) +{ + hrtimer_cancel(&cpu_ctx->hrtimer); + qm_debug_timer_cancel(); +} + +static void init_hrtimer(struct quadd_cpu_context *cpu_ctx) +{ + sample_time_reset(cpu_ctx); + + hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cpu_ctx->hrtimer.function = hrtimer_handler; +} + +u64 quadd_get_time(void) +{ + struct timespec ts; + + do_posix_clock_monotonic_gettime(&ts); + return timespec_to_ns(&ts); +} + +static u64 get_sample_time(void) +{ +#ifndef QUADD_USE_CORRECT_SAMPLE_TS + return quadd_get_time(); +#else + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + return cpu_ctx->current_time; +#endif +} + +static void sample_time_prepare(void) +{ +#ifdef QUADD_USE_CORRECT_SAMPLE_TS + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + + if (cpu_ctx->prev_time == ULLONG_MAX) + cpu_ctx->current_time = quadd_get_time(); + else + cpu_ctx->current_time = cpu_ctx->prev_time + hrt.sample_period; +#endif +} + +static void sample_time_finish(void) +{ +#ifdef QUADD_USE_CORRECT_SAMPLE_TS + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + cpu_ctx->prev_time = cpu_ctx->current_time; +#endif +} + +static void sample_time_reset(struct quadd_cpu_context *cpu_ctx) +{ +#ifdef QUADD_USE_CORRECT_SAMPLE_TS + cpu_ctx->prev_time = ULLONG_MAX; + cpu_ctx->current_time = ULLONG_MAX; +#endif +} + +static void put_header(void) +{ + int power_rate_period; + struct quadd_record_data record; + struct quadd_header_data *hdr = &record.hdr; + struct quadd_parameters *param = &hrt.quadd_ctx->param; + struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm; + + record.magic = QUADD_RECORD_MAGIC; + record.record_type = QUADD_RECORD_TYPE_HEADER; + record.cpu_mode = QUADD_CPU_MODE_NONE; + + hdr->version = QUADD_SAMPLES_VERSION; + + hdr->backtrace = param->backtrace; + hdr->use_freq = param->use_freq; + hdr->system_wide = param->system_wide; + + /* TODO: dynamically */ +#ifdef QM_DEBUG_SAMPLES_ENABLE + hdr->debug_samples = 1; +#else + hdr->debug_samples = 0; +#endif + + hdr->period = hrt.sample_period; + hdr->ma_period = hrt.ma_period; + + hdr->power_rate = quadd_power_clk_is_enabled(&power_rate_period); + hdr->power_rate_period = power_rate_period; + + comm->put_sample(&record, NULL, 0); +} + +void quadd_put_sample(struct quadd_record_data *data, + char *extra_data, unsigned int extra_length) +{ + struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm; + + if (data->record_type == QUADD_RECORD_TYPE_SAMPLE && + data->sample.period > 0x7FFFFFFF) { + struct quadd_sample_data *sample = &data->sample; + pr_err_once("very big period, sample id: %d\n", + sample->event_id); + return; + } + + comm->put_sample(data, extra_data, extra_length); + atomic64_inc(&hrt.counter_samples); +} + +static int get_sample_data(struct event_data *event, + struct pt_regs *regs, + struct quadd_sample_data *sample) +{ + u32 period; + u32 prev_val, val; + + prev_val = event->prev_val; + val = event->val; + + sample->event_id = event->event_id; + + sample->ip = instruction_pointer(regs); + sample->cpu = quadd_get_processor_id(); + sample->time = get_sample_time(); + + if (prev_val <= val) + period = val - prev_val; + else + period = QUADD_U32_MAX - prev_val + val; + + if (event->event_source == QUADD_EVENT_SOURCE_PL310) { + int nr_current_active = atomic_read(&hrt.nr_active_all_core); + if (nr_current_active > 1) + period = period / nr_current_active; + } + + sample->period = period; + return 0; +} + +static char *get_mmap_data(struct pt_regs *regs, + struct quadd_mmap_data *sample, + unsigned int *extra_length) +{ + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + return quadd_get_mmap(cpu_ctx, regs, sample, extra_length); +} + +static void read_source(struct quadd_event_source_interface *source, + struct pt_regs *regs, pid_t pid) +{ + int nr_events, i; + struct event_data events[QUADD_MAX_COUNTERS]; + struct quadd_record_data record_data; + struct quadd_thread_data *t_data; + char *extra_data = NULL; + unsigned int extra_length = 0, callchain_nr = 0; + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + struct quadd_callchain *callchain_data = &cpu_ctx->callchain_data; + + if (!source) + return; + + nr_events = source->read(events); + + if (nr_events == 0 || nr_events > QUADD_MAX_COUNTERS) { + pr_err_once("Error number of counters: %d, source: %p\n", + nr_events, source); + return; + } + + if (user_mode(regs) && hrt.quadd_ctx->param.backtrace) { + callchain_nr = quadd_get_user_callchain(regs, callchain_data); + if (callchain_nr > 0) { + extra_data = (char *)cpu_ctx->callchain_data.callchain; + extra_length = callchain_nr * sizeof(u32); + } + } + + for (i = 0; i < nr_events; i++) { + if (get_sample_data(&events[i], regs, &record_data.sample)) + return; + + record_data.magic = QUADD_RECORD_MAGIC; + record_data.record_type = QUADD_RECORD_TYPE_SAMPLE; + record_data.cpu_mode = user_mode(regs) ? + QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL; + + record_data.sample.callchain_nr = callchain_nr; + + if (pid > 0) { + record_data.sample.pid = pid; + quadd_put_sample(&record_data, extra_data, + extra_length); + } else { + t_data = &cpu_ctx->active_thread; + + if (atomic_read(&cpu_ctx->nr_active) > 0) { + record_data.sample.pid = t_data->pid; + quadd_put_sample(&record_data, extra_data, + extra_length); + } + } + } +} + +static void read_all_sources(struct pt_regs *regs, pid_t pid) +{ + struct quadd_record_data record_data; + struct quadd_ctx *ctx = hrt.quadd_ctx; + unsigned int extra_length; + char *extra_data; + + if (!regs) + return; + + extra_data = get_mmap_data(regs, &record_data.mmap, &extra_length); + if (extra_data && extra_length > 0) { + record_data.magic = QUADD_RECORD_MAGIC; + record_data.record_type = QUADD_RECORD_TYPE_MMAP; + record_data.cpu_mode = QUADD_CPU_MODE_USER; + + record_data.mmap.filename_length = extra_length; + record_data.mmap.pid = pid > 0 ? pid : ctx->param.pids[0]; + + quadd_put_sample(&record_data, extra_data, extra_length); + } else { + record_data.mmap.filename_length = 0; + } + + if (ctx->pmu && ctx->pmu_info.active) + read_source(ctx->pmu, regs, pid); + + if (ctx->pl310 && ctx->pl310_info.active) + read_source(ctx->pl310, regs, pid); +} + +static inline int is_profile_process(pid_t pid) +{ + int i; + pid_t profile_pid; + struct quadd_ctx *ctx = hrt.quadd_ctx; + + for (i = 0; i < ctx->param.nr_pids; i++) { + profile_pid = ctx->param.pids[i]; + if (profile_pid == pid) + return 1; + } + return 0; +} + +static int +add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid) +{ + struct quadd_thread_data *t_data = &cpu_ctx->active_thread; + + if (t_data->pid > 0 || + atomic_read(&cpu_ctx->nr_active) > 0) { + pr_warn_once("Warning for thread: %d\n", (int)pid); + return 0; + } + + t_data->pid = pid; + t_data->tgid = tgid; + return 1; +} + +static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid) +{ + struct quadd_thread_data *t_data = &cpu_ctx->active_thread; + + if (t_data->pid < 0) + return 0; + + if (t_data->pid == pid) { + t_data->pid = -1; + t_data->tgid = -1; + return 1; + } + + pr_warn_once("Warning for thread: %d\n", (int)pid); + return 0; +} + +static int task_sched_in(struct kprobe *kp, struct pt_regs *regs) +{ + int n, prev_flag, current_flag; + struct task_struct *prev, *task; + int prev_nr_active, new_nr_active; + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + struct quadd_ctx *ctx = hrt.quadd_ctx; + struct event_data events[QUADD_MAX_COUNTERS]; + /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */ + + if (hrt.active == 0) + return 0; + + prev = (struct task_struct *)regs->ARM_r1; + task = current; +/* + if (__ratelimit(&ratelimit_state)) + pr_info("cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n", + quadd_get_processor_id(), (unsigned int)prev->pid, + (unsigned int)prev->tgid, (unsigned int)task->pid, + (unsigned int)task->tgid); +*/ + if (!prev || !prev->real_parent || !prev->group_leader || + prev->group_leader->tgid != prev->tgid) { + pr_err_once("Warning\n"); + return 0; + } + + prev_flag = is_profile_process(prev->tgid); + current_flag = is_profile_process(task->tgid); + + if (prev_flag || current_flag) { + prev_nr_active = atomic_read(&cpu_ctx->nr_active); + qm_debug_task_sched_in(prev->pid, task->pid, prev_nr_active); + + if (prev_flag) { + n = remove_active_thread(cpu_ctx, prev->pid); + atomic_sub(n, &cpu_ctx->nr_active); + } + if (current_flag) { + add_active_thread(cpu_ctx, task->pid, task->tgid); + atomic_inc(&cpu_ctx->nr_active); + } + + new_nr_active = atomic_read(&cpu_ctx->nr_active); + if (prev_nr_active != new_nr_active) { + if (prev_nr_active == 0) { + if (ctx->pmu) + ctx->pmu->start(); + + if (ctx->pl310) + ctx->pl310->read(events); + + start_hrtimer(cpu_ctx); + atomic_inc(&hrt.nr_active_all_core); + } else if (new_nr_active == 0) { + cancel_hrtimer(cpu_ctx); + atomic_dec(&hrt.nr_active_all_core); + + if (ctx->pmu) + ctx->pmu->stop(); + } + } + } + + return 0; +} + +static int handler_fault(struct kprobe *kp, struct pt_regs *regs, int trapnr) +{ + pr_err_once("addr: %p, symbol: %s\n", kp->addr, kp->symbol_name); + return 0; +} + +static int start_instr(void) +{ + int err; + + memset(&hrt.kp_in, 0, sizeof(struct kprobe)); + + hrt.kp_in.pre_handler = task_sched_in; + hrt.kp_in.fault_handler = handler_fault; + hrt.kp_in.addr = 0; + hrt.kp_in.symbol_name = QUADD_HRT_SCHED_IN_FUNC; + + err = register_kprobe(&hrt.kp_in); + if (err) { + pr_err("register_kprobe error, symbol_name: %s\n", + hrt.kp_in.symbol_name); + return err; + } + return 0; +} + +static void stop_instr(void) +{ + unregister_kprobe(&hrt.kp_in); +} + +static int init_instr(void) +{ + int err; + + err = start_instr(); + if (err) { + pr_err("Init instr failed\n"); + return err; + } + stop_instr(); + return 0; +} + +static int deinit_instr(void) +{ + return 0; +} + +static void reset_cpu_ctx(void) +{ + int cpu_id; + struct quadd_cpu_context *cpu_ctx; + struct quadd_thread_data *t_data; + + for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) { + cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id); + t_data = &cpu_ctx->active_thread; + + atomic_set(&cpu_ctx->nr_active, 0); + + t_data->pid = -1; + t_data->tgid = -1; + + sample_time_reset(cpu_ctx); + } +} + +int quadd_hrt_start(void) +{ + int err; + u64 period; + long freq; + struct quadd_ctx *ctx = hrt.quadd_ctx; + + freq = ctx->param.freq; + freq = max_t(long, QUADD_HRT_MIN_FREQ, freq); + period = NSEC_PER_SEC / freq; + hrt.sample_period = period; + + if (ctx->param.ma_freq > 0) + hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq; + else + hrt.ma_period = 0; + + atomic64_set(&hrt.counter_samples, 0); + + reset_cpu_ctx(); + + err = start_instr(); + if (err) { + pr_err("error: start_instr is failed\n"); + return err; + } + + put_header(); + + if (ctx->pl310) + ctx->pl310->start(); + + quadd_ma_start(&hrt); + + hrt.active = 1; + + pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period); + return 0; +} + +void quadd_hrt_stop(void) +{ + struct quadd_ctx *ctx = hrt.quadd_ctx; + + pr_info("Stop hrt, number of samples: %llu\n", + atomic64_read(&hrt.counter_samples)); + + if (ctx->pl310) + ctx->pl310->stop(); + + quadd_ma_stop(&hrt); + + hrt.active = 0; + stop_instr(); + + atomic64_set(&hrt.counter_samples, 0); + + /* reset_cpu_ctx(); */ +} + +void quadd_hrt_deinit(void) +{ + if (hrt.active) + quadd_hrt_stop(); + + deinit_instr(); + free_percpu(hrt.cpu_ctx); +} + +void quadd_hrt_get_state(struct quadd_module_state *state) +{ + state->nr_all_samples = atomic64_read(&hrt.counter_samples); + state->nr_skipped_samples = 0; +} + +struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx) +{ + int cpu_id; + u64 period; + long freq; + struct quadd_cpu_context *cpu_ctx; + + hrt.quadd_ctx = ctx; + hrt.active = 0; + + freq = ctx->param.freq; + freq = max_t(long, QUADD_HRT_MIN_FREQ, freq); + period = NSEC_PER_SEC / freq; + hrt.sample_period = period; + + if (ctx->param.ma_freq > 0) + hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq; + else + hrt.ma_period = 0; + + atomic64_set(&hrt.counter_samples, 0); + + hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context); + if (!hrt.cpu_ctx) + return NULL; + + for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) { + cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id); + + atomic_set(&cpu_ctx->nr_active, 0); + + cpu_ctx->active_thread.pid = -1; + cpu_ctx->active_thread.tgid = -1; + + init_hrtimer(cpu_ctx); + } + + if (init_instr()) + return NULL; + + return &hrt; +} diff --git a/drivers/misc/tegra-profiler/hrt.h b/drivers/misc/tegra-profiler/hrt.h new file mode 100644 index 000000000000..f113b8846a02 --- /dev/null +++ b/drivers/misc/tegra-profiler/hrt.h @@ -0,0 +1,94 @@ +/* + * drivers/misc/tegra-profiler/hrt.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_HRT_H +#define __QUADD_HRT_H + +#define QUADD_MAX_STACK_DEPTH 64 + +#ifdef __KERNEL__ + +#include <linux/hrtimer.h> +#include <linux/limits.h> +#include <linux/kprobes.h> + +#include "backtrace.h" + +#define QUADD_USE_CORRECT_SAMPLE_TS 1 + +struct quadd_thread_data { + pid_t pid; + pid_t tgid; +}; + +struct quadd_cpu_context { + struct hrtimer hrtimer; + + struct quadd_callchain callchain_data; + char mmap_filename[PATH_MAX]; + + struct quadd_thread_data active_thread; + atomic_t nr_active; + +#ifdef QUADD_USE_CORRECT_SAMPLE_TS + u64 prev_time; + u64 current_time; +#endif +}; + +struct quadd_hrt_ctx { + struct quadd_cpu_context * __percpu cpu_ctx; + u64 sample_period; + + struct kprobe kp_in; + /* struct kinstr ki_out; */ + + struct quadd_ctx *quadd_ctx; + + int active; + atomic64_t counter_samples; + atomic_t nr_active_all_core; + + struct timer_list ma_timer; + unsigned int ma_period; + + unsigned long vm_size_prev; + unsigned long rss_size_prev; +}; + +#define QUADD_HRT_MIN_FREQ 110 + +#define QUADD_U32_MAX (~(__u32)0) + +struct quadd_hrt_ctx; +struct quadd_record_data; +struct quadd_module_state; + +struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx); +void quadd_hrt_deinit(void); + +int quadd_hrt_start(void); +void quadd_hrt_stop(void); + +void quadd_put_sample(struct quadd_record_data *data, + char *extra_data, unsigned int extra_length); + +void quadd_hrt_get_state(struct quadd_module_state *state); +u64 quadd_get_time(void); + +#endif /* __KERNEL__ */ + +#endif /* __QUADD_HRT_H */ diff --git a/drivers/misc/tegra-profiler/ma.c b/drivers/misc/tegra-profiler/ma.c new file mode 100644 index 000000000000..2021954ace58 --- /dev/null +++ b/drivers/misc/tegra-profiler/ma.c @@ -0,0 +1,132 @@ +/* + * drivers/misc/tegra-profiler/ma.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/timer.h> + +#include <linux/tegra_profiler.h> + +#include "ma.h" +#include "quadd.h" +#include "hrt.h" +#include "comm.h" +#include "debug.h" + +static void make_sample(struct quadd_hrt_ctx *hrt_ctx, + pid_t pid, unsigned long vm_size, + unsigned long rss_size) +{ + struct quadd_record_data record; + struct quadd_ma_data *ma = &record.ma; + struct quadd_comm_data_interface *comm = hrt_ctx->quadd_ctx->comm; + + record.magic = QUADD_RECORD_MAGIC; + record.record_type = QUADD_RECORD_TYPE_MA; + record.cpu_mode = QUADD_CPU_MODE_NONE; + + ma->pid = pid; + ma->time = quadd_get_time(); + + ma->vm_size = vm_size << PAGE_SHIFT; + ma->rss_size = rss_size << PAGE_SHIFT; +/* + pr_debug("vm: %llu bytes (%llu mb), rss: %llu bytes (%llu mb)\n", + ma->vm_size, ma->vm_size / 0x100000, + ma->rss_size, ma->rss_size / 0x100000); +*/ + comm->put_sample(&record, NULL, 0); +} + +static void check_ma(struct quadd_hrt_ctx *hrt_ctx) +{ + pid_t pid; + struct pid *pid_s; + struct task_struct *task = NULL; + struct mm_struct *mm; + struct quadd_ctx *quadd_ctx = hrt_ctx->quadd_ctx; + unsigned long vm_size, rss_size, total_vm; + + pid = quadd_ctx->param.pids[0]; + + rcu_read_lock(); + pid_s = find_vpid(pid); + if (pid_s) + task = pid_task(pid_s, PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + return; + + mm = task->mm; + if (!mm) + return; + + total_vm = mm->total_vm; + vm_size = total_vm - mm->reserved_vm; + rss_size = get_mm_rss(mm); + + if (vm_size != hrt_ctx->vm_size_prev || + rss_size != hrt_ctx->rss_size_prev) { + make_sample(hrt_ctx, pid, vm_size, rss_size); + hrt_ctx->vm_size_prev = vm_size; + hrt_ctx->rss_size_prev = rss_size; + } +} + +static void timer_interrupt(unsigned long data) +{ + struct quadd_hrt_ctx *hrt_ctx = (struct quadd_hrt_ctx *)data; + struct timer_list *timer = &hrt_ctx->ma_timer; + + if (hrt_ctx->active == 0) + return; + + check_ma(hrt_ctx); + + timer->expires = jiffies + msecs_to_jiffies(hrt_ctx->ma_period); + add_timer(timer); +} + +void quadd_ma_start(struct quadd_hrt_ctx *hrt_ctx) +{ + struct timer_list *timer = &hrt_ctx->ma_timer; + + if (hrt_ctx->ma_period == 0) { + pr_info("QuadD MA is disabled\n"); + return; + } + pr_info("QuadD MA is started, interval: %u msec\n", + hrt_ctx->ma_period); + + hrt_ctx->vm_size_prev = 0; + hrt_ctx->rss_size_prev = 0; + + init_timer(timer); + timer->function = timer_interrupt; + timer->expires = jiffies + msecs_to_jiffies(hrt_ctx->ma_period); + timer->data = (unsigned long)hrt_ctx; + add_timer(timer); +} + +void quadd_ma_stop(struct quadd_hrt_ctx *hrt_ctx) +{ + if (hrt_ctx->ma_period > 0) { + pr_info("QuadD MA is stopped\n"); + del_timer_sync(&hrt_ctx->ma_timer); + } +} diff --git a/drivers/misc/tegra-profiler/ma.h b/drivers/misc/tegra-profiler/ma.h new file mode 100644 index 000000000000..be892b650927 --- /dev/null +++ b/drivers/misc/tegra-profiler/ma.h @@ -0,0 +1,25 @@ +/* + * drivers/misc/tegra-profiler/ma.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_MA_H +#define __QUADD_MA_H + +struct quadd_hrt_ctx; + +void quadd_ma_start(struct quadd_hrt_ctx *hrt_ctx); +void quadd_ma_stop(struct quadd_hrt_ctx *hrt_ctx); + +#endif /* __QUADD_MA_H */ diff --git a/drivers/misc/tegra-profiler/main.c b/drivers/misc/tegra-profiler/main.c new file mode 100644 index 000000000000..71b9554e92d3 --- /dev/null +++ b/drivers/misc/tegra-profiler/main.c @@ -0,0 +1,471 @@ +/* + * drivers/misc/tegra-profiler/main.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> + +#include <linux/tegra_profiler.h> + +#include "quadd.h" +#include "armv7_pmu.h" +#include "hrt.h" +#include "pl310.h" +#include "comm.h" +#include "mmap.h" +#include "debug.h" +#include "tegra.h" +#include "power_clk.h" +#include "auth.h" +#include "version.h" + +static struct quadd_ctx ctx; + +static int get_default_properties(void) +{ + ctx.param.freq = 100; + ctx.param.ma_freq = 50; + ctx.param.backtrace = 1; + ctx.param.use_freq = 1; + ctx.param.system_wide = 1; + ctx.param.power_rate_freq = 0; + ctx.param.debug_samples = 0; + + ctx.param.pids[0] = 0; + ctx.param.nr_pids = 1; + + return 0; +} + +static int start(void) +{ + int err; + + if (!atomic_cmpxchg(&ctx.started, 0, 1)) { + if (ctx.pmu) { + err = ctx.pmu->enable(); + if (err) { + pr_err("error: pmu enable\n"); + return err; + } + } + + if (ctx.pl310) { + err = ctx.pl310->enable(); + if (err) { + pr_err("error: pl310 enable\n"); + return err; + } + } + + quadd_mmap_reset(); + ctx.comm->reset(); + + err = quadd_power_clk_start(); + if (err < 0) { + pr_err("error: power_clk start\n"); + return err; + } + + err = quadd_hrt_start(); + if (err) { + pr_err("error: hrt start\n"); + return err; + } + } + + return 0; +} + +static void stop(void) +{ + if (atomic_cmpxchg(&ctx.started, 1, 0)) { + quadd_hrt_stop(); + + quadd_mmap_reset(); + ctx.comm->reset(); + + quadd_power_clk_stop(); + + if (ctx.pmu) + ctx.pmu->disable(); + + if (ctx.pl310) + ctx.pl310->disable(); + } +} + +static inline int is_event_supported(struct source_info *si, int event) +{ + int i; + int nr = si->nr_supported_events; + int *events = si->supported_events; + + for (i = 0; i < nr; i++) { + if (event == events[i]) + return 1; + } + return 0; +} + +static int set_parameters(struct quadd_parameters *param, uid_t *debug_app_uid) +{ + int i, err; + int pmu_events_id[QUADD_MAX_COUNTERS]; + int pl310_events_id; + int nr_pmu = 0, nr_pl310 = 0; + int uid = 0; + struct task_struct *task; + + if (ctx.param.freq != 100 && ctx.param.freq != 1000 && + ctx.param.freq != 10000) + return -EINVAL; + + ctx.param.freq = param->freq; + ctx.param.ma_freq = param->ma_freq; + ctx.param.backtrace = param->backtrace; + ctx.param.use_freq = param->use_freq; + ctx.param.system_wide = param->system_wide; + ctx.param.power_rate_freq = param->power_rate_freq; + ctx.param.debug_samples = param->debug_samples; + + /* Currently only one process */ + if (param->nr_pids != 1) + return -EINVAL; + + rcu_read_lock(); + task = pid_task(find_vpid(param->pids[0]), PIDTYPE_PID); + rcu_read_unlock(); + if (!task) { + pr_err("Process not found: %u\n", param->pids[0]); + return -ESRCH; + } + + pr_info("owner/task uids: %u/%u\n", current_fsuid(), task_uid(task)); + if (!capable(CAP_SYS_ADMIN)) { + if (current_fsuid() != task_uid(task)) { + uid = quadd_auth_check_debug_flag(param->package_name); + if (uid < 0) { + pr_err("Error: QuadD security service\n"); + return uid; + } else if (uid == 0) { + pr_err("Error: app is not debuggable\n"); + return -EACCES; + } + + *debug_app_uid = uid; + pr_info("debug_app_uid: %u\n", uid); + } + } + + for (i = 0; i < param->nr_pids; i++) + ctx.param.pids[i] = param->pids[i]; + + ctx.param.nr_pids = param->nr_pids; + + for (i = 0; i < param->nr_events; i++) { + int event = param->events[i]; + + if (ctx.pmu && ctx.pmu_info.nr_supported_events > 0 + && is_event_supported(&ctx.pmu_info, event)) { + pmu_events_id[nr_pmu++] = param->events[i]; + + pr_info("PMU active event: %s\n", + quadd_get_event_str(event)); + } else if (ctx.pl310 && + ctx.pl310_info.nr_supported_events > 0 && + is_event_supported(&ctx.pl310_info, event)) { + pl310_events_id = param->events[i]; + + pr_info("PL310 active event: %s\n", + quadd_get_event_str(event)); + + if (nr_pl310++ > 1) { + pr_err("error: multiply pl310 events\n"); + return -EINVAL; + } + } else { + pr_err("Bad event: %s\n", + quadd_get_event_str(event)); + return -EINVAL; + } + } + + if (ctx.pmu) { + if (nr_pmu > 0) { + err = ctx.pmu->set_events(pmu_events_id, nr_pmu); + if (err) { + pr_err("PMU set parameters: error\n"); + return err; + } + ctx.pmu_info.active = 1; + } else { + ctx.pmu_info.active = 0; + ctx.pmu->set_events(NULL, 0); + } + } + + if (ctx.pl310) { + if (nr_pl310 == 1) { + err = ctx.pl310->set_events(&pl310_events_id, 1); + if (err) { + pr_info("pl310 set_parameters: error\n"); + return err; + } + ctx.pl310_info.active = 1; + } else { + ctx.pl310_info.active = 0; + ctx.pl310->set_events(NULL, 0); + } + } + pr_info("New parameters have been applied\n"); + + return 0; +} + +static void get_capabilities(struct quadd_comm_cap *cap) +{ + int i, event; + struct quadd_events_cap *events_cap = &cap->events_cap; + + cap->pmu = ctx.pmu ? 1 : 0; + + cap->l2_cache = 0; + if (ctx.pl310) { + cap->l2_cache = 1; + cap->l2_multiple_events = 0; + } else if (ctx.pmu) { + struct source_info *s = &ctx.pmu_info; + for (i = 0; i < s->nr_supported_events; i++) { + event = s->supported_events[i]; + if (event == QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES || + event == QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES || + event == QUADD_EVENT_TYPE_L2_ICACHE_MISSES) { + cap->l2_cache = 1; + cap->l2_multiple_events = 1; + break; + } + } + } + + events_cap->cpu_cycles = 0; + events_cap->l1_dcache_read_misses = 0; + events_cap->l1_dcache_write_misses = 0; + events_cap->l1_icache_misses = 0; + + events_cap->instructions = 0; + events_cap->branch_instructions = 0; + events_cap->branch_misses = 0; + events_cap->bus_cycles = 0; + + events_cap->l2_dcache_read_misses = 0; + events_cap->l2_dcache_write_misses = 0; + events_cap->l2_icache_misses = 0; + + if (ctx.pl310) { + struct source_info *s = &ctx.pl310_info; + for (i = 0; i < s->nr_supported_events; i++) { + int event = s->supported_events[i]; + + switch (event) { + case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES: + events_cap->l2_dcache_read_misses = 1; + break; + case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES: + events_cap->l2_dcache_write_misses = 1; + break; + case QUADD_EVENT_TYPE_L2_ICACHE_MISSES: + events_cap->l2_icache_misses = 1; + break; + + default: + BUG(); + break; + } + } + } + + if (ctx.pmu) { + struct source_info *s = &ctx.pmu_info; + for (i = 0; i < s->nr_supported_events; i++) { + int event = s->supported_events[i]; + + switch (event) { + case QUADD_EVENT_TYPE_CPU_CYCLES: + events_cap->cpu_cycles = 1; + break; + case QUADD_EVENT_TYPE_INSTRUCTIONS: + events_cap->instructions = 1; + break; + case QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS: + events_cap->branch_instructions = 1; + break; + case QUADD_EVENT_TYPE_BRANCH_MISSES: + events_cap->branch_misses = 1; + break; + case QUADD_EVENT_TYPE_BUS_CYCLES: + events_cap->bus_cycles = 1; + break; + + case QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES: + events_cap->l1_dcache_read_misses = 1; + break; + case QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES: + events_cap->l1_dcache_write_misses = 1; + break; + case QUADD_EVENT_TYPE_L1_ICACHE_MISSES: + events_cap->l1_icache_misses = 1; + break; + + case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES: + events_cap->l2_dcache_read_misses = 1; + break; + case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES: + events_cap->l2_dcache_write_misses = 1; + break; + case QUADD_EVENT_TYPE_L2_ICACHE_MISSES: + events_cap->l2_icache_misses = 1; + break; + + default: + BUG(); + break; + } + } + } + + cap->tegra_lp_cluster = quadd_is_cpu_with_lp_cluster(); + cap->power_rate = 1; + cap->blocked_read = 0; +} + +static void get_state(struct quadd_module_state *state) +{ + quadd_hrt_get_state(state); +} + +static struct quadd_comm_control_interface control = { + .start = start, + .stop = stop, + .set_parameters = set_parameters, + .get_capabilities = get_capabilities, + .get_state = get_state, +}; + +static int __init quadd_module_init(void) +{ + int i, nr_events, err; + int *events; + + pr_info("Branch: %s\n", QUADD_MODULE_BRANCH); + pr_info("Version: %s\n", QUADD_MODULE_VERSION); + pr_info("Samples version: %d\n", QUADD_SAMPLES_VERSION); + pr_info("IO version: %d\n", QUADD_IO_VERSION); + +#ifdef QM_DEBUG_SAMPLES_ENABLE + pr_info("############## DEBUG VERSION! ##############\n"); +#endif + atomic_set(&ctx.started, 0); + + get_default_properties(); + + ctx.pmu_info.active = 0; + ctx.pl310_info.active = 0; + + ctx.pmu = quadd_armv7_pmu_init(); + if (!ctx.pmu) { + pr_err("PMU init failed\n"); + return -ENODEV; + } else { + events = ctx.pmu_info.supported_events; + nr_events = ctx.pmu->get_supported_events(events); + ctx.pmu_info.nr_supported_events = nr_events; + + pr_info("PMU: amount of events: %d\n", nr_events); + + for (i = 0; i < nr_events; i++) + pr_info("PMU event: %s\n", + quadd_get_event_str(events[i])); + } + + ctx.pl310 = quadd_l2x0_events_init(); + if (ctx.pl310) { + events = ctx.pl310_info.supported_events; + nr_events = ctx.pl310->get_supported_events(events); + ctx.pl310_info.nr_supported_events = nr_events; + + pr_info("pl310 success, amount of events: %d\n", + nr_events); + + for (i = 0; i < nr_events; i++) + pr_info("pl310 event: %s\n", + quadd_get_event_str(events[i])); + } else { + pr_info("PL310 not found\n"); + } + + ctx.hrt = quadd_hrt_init(&ctx); + if (!ctx.hrt) { + pr_err("error: HRT init failed\n"); + return -ENODEV; + } + + ctx.mmap = quadd_mmap_init(&ctx); + if (!ctx.mmap) { + pr_err("error: MMAP init failed\n"); + return -ENODEV; + } + + err = quadd_power_clk_init(&ctx); + if (err < 0) { + pr_err("error: POWER CLK init failed\n"); + return err; + } + + ctx.comm = quadd_comm_events_init(&control); + if (!ctx.comm) { + pr_err("error: COMM init failed\n"); + return -ENODEV; + } + + err = quadd_auth_init(&ctx); + if (err < 0) { + pr_err("error: auth failed\n"); + return err; + } + + return 0; +} + +static void __exit quadd_module_exit(void) +{ + pr_info("QuadD module exit\n"); + + quadd_hrt_deinit(); + quadd_mmap_deinit(); + quadd_power_clk_deinit(); + quadd_comm_events_exit(); + quadd_auth_deinit(); +} + +module_init(quadd_module_init); +module_exit(quadd_module_exit); + +MODULE_LICENSE("GPL"); + +MODULE_AUTHOR("Nvidia Ltd"); +MODULE_DESCRIPTION("Tegra profiler"); diff --git a/drivers/misc/tegra-profiler/mmap.c b/drivers/misc/tegra-profiler/mmap.c new file mode 100644 index 000000000000..a52b11f74cd2 --- /dev/null +++ b/drivers/misc/tegra-profiler/mmap.c @@ -0,0 +1,236 @@ +/* + * drivers/misc/tegra-profiler/mmap.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/crc32.h> +#include <linux/fs.h> +#include <linux/slab.h> + +#include <linux/tegra_profiler.h> + +#include "mmap.h" +#include "hrt.h" +#include "debug.h" + +static struct quadd_mmap_ctx mmap_ctx; + +static int binary_search_and_add(unsigned int *array, + unsigned int length, unsigned int key) +{ + unsigned int i_min, i_max, mid; + + if (length == 0) { + array[0] = key; + return 1; + } else if (length == 1 && array[0] == key) { + return 0; + } + + i_min = 0; + i_max = length; + + if (array[0] > key) { + memmove((char *)((unsigned int *)array + 1), array, + length * sizeof(unsigned int)); + array[0] = key; + return 1; + } else if (array[length - 1] < key) { + array[length] = key; + return 1; + } + + while (i_min < i_max) { + mid = i_min + (i_max - i_min) / 2; + + if (key <= array[mid]) + i_max = mid; + else + i_min = mid + 1; + } + + if (array[i_max] == key) { + return 0; + } else { + memmove((char *)((unsigned int *)array + i_max + 1), + (char *)((unsigned int *)array + i_max), + (length - i_max) * sizeof(unsigned int)); + array[i_max] = key; + return 1; + } +} + +static int check_hash(u32 key) +{ + int res; + unsigned long flags; + + spin_lock_irqsave(&mmap_ctx.lock, flags); + + if (mmap_ctx.nr_hashes >= QUADD_MMAP_SIZE_ARRAY) { + spin_unlock_irqrestore(&mmap_ctx.lock, flags); + return 1; + } + + res = binary_search_and_add(mmap_ctx.hash_array, + mmap_ctx.nr_hashes, key); + if (res > 0) { + mmap_ctx.nr_hashes++; + spin_unlock_irqrestore(&mmap_ctx.lock, flags); + return 0; + } + + spin_unlock_irqrestore(&mmap_ctx.lock, flags); + return 1; +} + +char *quadd_get_mmap(struct quadd_cpu_context *cpu_ctx, + struct pt_regs *regs, struct quadd_mmap_data *sample, + unsigned int *extra_length) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct file *vm_file; + struct path *path; + char *file_name = NULL; + int length, length_aligned; + u32 crc; + unsigned long ip; + + if (!mm) { + *extra_length = 0; + return NULL; + } + + ip = instruction_pointer(regs); + + if (user_mode(regs)) { + for (vma = find_vma(mm, ip); vma; vma = vma->vm_next) { + if (ip < vma->vm_start || ip >= vma->vm_end) + continue; + + vm_file = vma->vm_file; + if (!vm_file) + break; + + path = &vm_file->f_path; + + file_name = d_path(path, mmap_ctx.tmp_buf, PATH_MAX); + if (file_name) { + sample->addr = vma->vm_start; + sample->len = vma->vm_end - vma->vm_start; + sample->pgoff = + (u64)vma->vm_pgoff << PAGE_SHIFT; + } + break; + } + } else { + struct module *mod; + + preempt_disable(); + mod = __module_address(ip); + preempt_enable(); + + if (mod) { + file_name = mod->name; + if (file_name) { + sample->addr = (u32) mod->module_core; + sample->len = mod->core_size; + sample->pgoff = 0; + } + } + } + + if (file_name) { + length = strlen(file_name); + if (length >= PATH_MAX) { + *extra_length = 0; + return NULL; + } + + crc = crc32_le(~0, file_name, length); + crc = crc32_le(crc, (unsigned char *)&sample->addr, + sizeof(sample->addr)); + crc = crc32_le(crc, (unsigned char *)&sample->len, + sizeof(sample->len)); + + if (!check_hash(crc)) { + strcpy(cpu_ctx->mmap_filename, file_name); + length_aligned = (length + 1 + 7) & (~7); + *extra_length = length_aligned; + + return cpu_ctx->mmap_filename; + } + } + + *extra_length = 0; + return NULL; +} + +struct quadd_mmap_ctx *quadd_mmap_init(struct quadd_ctx *quadd_ctx) +{ + u32 *hash; + char *tmp; + + mmap_ctx.quadd_ctx = quadd_ctx; + + hash = kzalloc(QUADD_MMAP_SIZE_ARRAY * sizeof(unsigned int), + GFP_KERNEL); + if (!hash) { + pr_err("Alloc error\n"); + return NULL; + } + mmap_ctx.hash_array = hash; + + mmap_ctx.nr_hashes = 0; + spin_lock_init(&mmap_ctx.lock); + + tmp = kzalloc(PATH_MAX + sizeof(unsigned long long), + GFP_KERNEL); + if (!tmp) { + pr_err("Alloc error\n"); + return NULL; + } + mmap_ctx.tmp_buf = tmp; + + return &mmap_ctx; +} + +void quadd_mmap_reset(void) +{ + unsigned long flags; + + spin_lock_irqsave(&mmap_ctx.lock, flags); + mmap_ctx.nr_hashes = 0; + spin_unlock_irqrestore(&mmap_ctx.lock, flags); +} + +void quadd_mmap_deinit(void) +{ + unsigned long flags; + + spin_lock_irqsave(&mmap_ctx.lock, flags); + + kfree(mmap_ctx.hash_array); + mmap_ctx.hash_array = NULL; + + kfree(mmap_ctx.tmp_buf); + mmap_ctx.tmp_buf = NULL; + + spin_unlock_irqrestore(&mmap_ctx.lock, flags); +} diff --git a/drivers/misc/tegra-profiler/mmap.h b/drivers/misc/tegra-profiler/mmap.h new file mode 100644 index 000000000000..f12ec4d61ed5 --- /dev/null +++ b/drivers/misc/tegra-profiler/mmap.h @@ -0,0 +1,47 @@ +/* + * drivers/misc/tegra-profiler/mmap.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_MMAP_H +#define __QUADD_MMAP_H + +#include <linux/types.h> + +struct quadd_cpu_context; +struct quadd_ctx; +struct quadd_mmap_data; + +#define QUADD_MMAP_SIZE_ARRAY 4096 + +struct quadd_mmap_ctx { + u32 *hash_array; + unsigned int nr_hashes; + spinlock_t lock; + + char *tmp_buf; + + struct quadd_ctx *quadd_ctx; +}; + +char *quadd_get_mmap(struct quadd_cpu_context *cpu_ctx, + struct pt_regs *regs, struct quadd_mmap_data *sample, + unsigned int *extra_length); + + +struct quadd_mmap_ctx *quadd_mmap_init(struct quadd_ctx *quadd_ctx); +void quadd_mmap_deinit(void); +void quadd_mmap_reset(void); + +#endif /* __QUADD_MMAP_H */ diff --git a/drivers/misc/tegra-profiler/pl310.c b/drivers/misc/tegra-profiler/pl310.c new file mode 100644 index 000000000000..010830823817 --- /dev/null +++ b/drivers/misc/tegra-profiler/pl310.c @@ -0,0 +1,317 @@ +/* + * drivers/misc/tegra-profiler/pl310.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/io.h> +#include <asm/hardware/cache-l2x0.h> + +#include <linux/tegra_profiler.h> + +#include "quadd.h" +#include "pl310.h" +#include "debug.h" + +DEFINE_PER_CPU(u32, pl310_prev_val); + +static struct l2x0_context l2x0_ctx; + +static void l2x0_enable_event_counters(u32 event0, u32 event1) +{ + u32 reg_val; + void __iomem *base = l2x0_ctx.l2x0_base; + + /* configure counter0 */ + reg_val = event0; + writel_relaxed(reg_val, base + L2X0_EVENT_CNT0_CFG); + + /* configure counter1 */ + reg_val = event1; + writel_relaxed(reg_val, base + L2X0_EVENT_CNT1_CFG); + + /* enable event counting */ + reg_val = L2X0_EVENT_CNT_ENABLE; + writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL); +} + +static void __maybe_unused l2x0_disable_event_counters(void) +{ + u32 reg_val; + void __iomem *base = l2x0_ctx.l2x0_base; + + /* disable event counting */ + reg_val = 0; + writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL); +} + +static void l2x0_stop_event_counters(void) +{ + void __iomem *base = l2x0_ctx.l2x0_base; + + writel_relaxed(0, base + L2X0_EVENT_CNT_CTRL); + + writel_relaxed(0, base + L2X0_EVENT_CNT0_CFG); + writel_relaxed(0, base + L2X0_EVENT_CNT1_CFG); +} + +static void l2x0_reset_event_counters(void) +{ + u32 reg_val; + void __iomem *base = l2x0_ctx.l2x0_base; + + reg_val = readl_relaxed(base + L2X0_EVENT_CNT_CTRL); + reg_val |= L2X0_EVENT_CNT_RESET_CNT0 | L2X0_EVENT_CNT_RESET_CNT1; + writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL); +} + +static u32 l2x0_read_event_counter(enum quadd_l2x0_counter counter) +{ + u32 reg_val = 0; + void __iomem *base = l2x0_ctx.l2x0_base; + + switch (counter) { + case QUADD_L2X0_COUNTER0: + reg_val = readl_relaxed(base + L2X0_EVENT_CNT0_VAL); + break; + case QUADD_L2X0_COUNTER1: + reg_val = readl_relaxed(base + L2X0_EVENT_CNT1_VAL); + break; + } + + return reg_val; +} + +static void l2x0_enable_perf_event(enum quadd_l2x0_event_type type) +{ + l2x0_reset_event_counters(); + + switch (type) { + case QUADD_L2X0_TYPE_DATA_READ_MISSES: + l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_DRREQ, + L2X0_EVENT_CNT_CFG_DRHIT); + break; + case QUADD_L2X0_TYPE_DATA_WRITE_MISSES: + l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_DWREQ, + L2X0_EVENT_CNT_CFG_DWHIT); + break; + case QUADD_L2X0_TYPE_INSTRUCTION_MISSES: + l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_IRREQ, + L2X0_EVENT_CNT_CFG_IRHIT); + break; + } +} + +static u32 l2x0_read_perf_event(void) +{ + u32 count_req, count_hit, count_miss; + + count_req = l2x0_read_event_counter(QUADD_L2X0_COUNTER0); + count_hit = l2x0_read_event_counter(QUADD_L2X0_COUNTER1); + + count_miss = count_req - count_hit; + if (count_req < count_hit) + return 0; + + return count_miss; +} + +static void l2x0_clear_values(void) +{ + int cpu_id; + for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) + per_cpu(pl310_prev_val, cpu_id) = 0; +} + +static int l2x0_events_enable(void) +{ + return 0; +} + +static void l2x0_events_disable(void) +{ +} + +static void l2x0_events_start(void) +{ + unsigned long flags; + + if (l2x0_ctx.l2x0_event_type < 0) + return; + + spin_lock_irqsave(&l2x0_ctx.lock, flags); + l2x0_clear_values(); + l2x0_enable_perf_event(l2x0_ctx.l2x0_event_type); + spin_unlock_irqrestore(&l2x0_ctx.lock, flags); + + qm_debug_start_source(QUADD_EVENT_SOURCE_PL310); +} + +static void l2x0_events_stop(void) +{ + unsigned long flags; + + if (l2x0_ctx.l2x0_event_type < 0) + return; + + spin_lock_irqsave(&l2x0_ctx.lock, flags); + l2x0_stop_event_counters(); + l2x0_clear_values(); + spin_unlock_irqrestore(&l2x0_ctx.lock, flags); + + qm_debug_stop_source(QUADD_EVENT_SOURCE_PL310); +} + +static int __maybe_unused l2x0_events_read(struct event_data *events) +{ + unsigned long flags; + + if (l2x0_ctx.l2x0_event_type < 0) { + pr_err_once("pl310 value: %u\n", events[0].val); + return 0; + } + + events[0].event_source = QUADD_EVENT_SOURCE_PL310; + events[0].event_id = l2x0_ctx.event_id; + + spin_lock_irqsave(&l2x0_ctx.lock, flags); + events[0].val = l2x0_read_perf_event(); + spin_unlock_irqrestore(&l2x0_ctx.lock, flags); + + events[0].prev_val = __get_cpu_var(pl310_prev_val); + + __get_cpu_var(pl310_prev_val) = events[0].val; + + qm_debug_read_counter(l2x0_ctx.event_id, events[0].prev_val, + events[0].val); + + return 1; +} + +static int __maybe_unused l2x0_events_read_emulate(struct event_data *events) +{ + static u32 val; + + if (val > 100) + val = 0; + + events[0].event_source = QUADD_EVENT_SOURCE_PL310; + events[0].event_id = QUADD_L2X0_TYPE_DATA_READ_MISSES; + + events[0].val = val; + events[0].prev_val = __get_cpu_var(pl310_prev_val); + + __get_cpu_var(pl310_prev_val) = val; + + val += 10; + + return 1; +} + +static int l2x0_set_events(int *events, int size) +{ + if (!events || size == 0) { + l2x0_ctx.l2x0_event_type = -1; + l2x0_ctx.event_id = -1; + return 0; + } + + if (size != 1) { + pr_err("Error: number of events more than one\n"); + return -ENOSPC; + } + + switch (*events) { + case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES: + l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_READ_MISSES; + break; + case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES: + l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_WRITE_MISSES; + break; + case QUADD_EVENT_TYPE_L2_ICACHE_MISSES: + l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_INSTRUCTION_MISSES; + break; + default: + pr_err("Error event: %s\n", quadd_get_event_str(*events)); + return 1; + } + l2x0_ctx.event_id = *events; + + pr_info("Event has been added: id/l2x0: %s/%#x\n", + quadd_get_event_str(*events), l2x0_ctx.l2x0_event_type); + return 0; +} + +static int get_supported_events(int *events) +{ + events[0] = QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES; + events[1] = QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES; + events[2] = QUADD_EVENT_TYPE_L2_ICACHE_MISSES; + return 3; +} + +static struct quadd_event_source_interface l2x0_int = { + .enable = l2x0_events_enable, + .disable = l2x0_events_disable, + + .start = l2x0_events_start, + .stop = l2x0_events_stop, + +#ifndef QUADD_USE_EMULATE_COUNTERS + .read = l2x0_events_read, +#else + .read = l2x0_events_read_emulate, +#endif + .set_events = l2x0_set_events, + .get_supported_events = get_supported_events, +}; + +struct quadd_event_source_interface *quadd_l2x0_events_init(void) +{ + void __iomem *base; + unsigned long phys_addr; + + l2x0_ctx.l2x0_event_type = -1; + l2x0_ctx.event_id = -1; + + l2x0_ctx.l2x0_base = NULL; + + phys_addr = quadd_get_pl310_phys_addr(); + if (!phys_addr) + return NULL; + + base = ioremap(phys_addr, SZ_4K); + if (base) { + u32 cache_id = readl(base + L2X0_CACHE_ID); + + if ((cache_id & 0xff0003c0) != 0x410000c0) { + iounmap(base); + return NULL; + } + } + + if (!base) + return NULL; + + l2x0_ctx.l2x0_base = base; + + l2x0_clear_values(); + spin_lock_init(&l2x0_ctx.lock); + + pr_debug("pl310 init success, l2x0_base: %p\n", base); + return &l2x0_int; +} diff --git a/drivers/misc/tegra-profiler/pl310.h b/drivers/misc/tegra-profiler/pl310.h new file mode 100644 index 000000000000..96e60bfea9db --- /dev/null +++ b/drivers/misc/tegra-profiler/pl310.h @@ -0,0 +1,79 @@ +/* + * drivers/misc/tegra-profiler/pl310.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_PL310_H +#define __QUADD_PL310_H + +/* + * l2x0 event type + */ +enum quadd_l2x0_event_type { + QUADD_L2X0_TYPE_DATA_READ_MISSES = 0, + QUADD_L2X0_TYPE_DATA_WRITE_MISSES = 1, + QUADD_L2X0_TYPE_INSTRUCTION_MISSES = 2, +}; + +#ifdef __KERNEL__ + +#include <linux/io.h> + +#define L2X0_EVENT_CNT_ENABLE (1 << 0) +#define L2X0_EVENT_CNT_RESET_CNT0 (1 << 1) +#define L2X0_EVENT_CNT_RESET_CNT1 (2 << 1) + + +#define L2X0_EVENT_CNT_CFG_DRHIT (2 << 2) +#define L2X0_EVENT_CNT_CFG_DRREQ (3 << 2) + +#define L2X0_EVENT_CNT_CFG_DWHIT (4 << 2) +#define L2X0_EVENT_CNT_CFG_DWREQ (5 << 2) + +#define L2X0_EVENT_CNT_CFG_IRHIT (7 << 2) +#define L2X0_EVENT_CNT_CFG_IRREQ (8 << 2) + +/* + * l2x0 counters + */ +enum quadd_l2x0_counter { + QUADD_L2X0_COUNTER1 = 0, + QUADD_L2X0_COUNTER0 = 1, +}; + +struct l2x0_context { + int l2x0_event_type; + int event_id; + + void __iomem *l2x0_base; + spinlock_t lock; +}; + +struct quadd_event_source_interface; + +struct quadd_event_source_interface *quadd_l2x0_events_init(void); + +static inline unsigned long quadd_get_pl310_phys_addr(void) +{ + unsigned long phys_addr = 0; + +#if defined(CONFIG_ARCH_TEGRA) + phys_addr = 0x50043000; +#endif + return phys_addr; +} + +#endif /* __KERNEL__ */ + +#endif /* __QUADD_PL310_H */ diff --git a/drivers/misc/tegra-profiler/power_clk.c b/drivers/misc/tegra-profiler/power_clk.c new file mode 100644 index 000000000000..b5b73f4afcea --- /dev/null +++ b/drivers/misc/tegra-profiler/power_clk.c @@ -0,0 +1,454 @@ +/* + * drivers/misc/tegra-profiler/power_clk.c + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/cpufreq.h> +#include <linux/clk.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/timer.h> + +#include <linux/tegra_profiler.h> + +#include "power_clk.h" +#include "quadd.h" +#include "hrt.h" +#include "comm.h" +#include "debug.h" + +#define POWER_CLK_MAX_VALUES 32 + +typedef int (*notifier_call_ft)(struct notifier_block *, unsigned long, void *); + +struct power_clk_data { + unsigned long value; + unsigned long prev; +}; + +struct power_clk_source { + int type; + + struct clk *clkp; + struct notifier_block nb; + + int nr; + struct power_clk_data data[POWER_CLK_MAX_VALUES]; + + unsigned long long counter; + atomic_t active; + + struct mutex lock; +}; + +struct power_clk_context_s { + struct power_clk_source cpu; + struct power_clk_source gpu; + struct power_clk_source emc; + + struct timer_list timer; + unsigned int period; + + struct quadd_ctx *quadd_ctx; +}; + +enum { + QUADD_POWER_CLK_CPU = 1, + QUADD_POWER_CLK_GPU, + QUADD_POWER_CLK_EMC, +}; + +static struct power_clk_context_s power_ctx; + +static void check_clks(void); + +static void read_source(struct power_clk_source *s) +{ + int i; + + mutex_lock(&s->lock); + + switch (s->type) { + case QUADD_POWER_CLK_CPU: + /* update cpu frequency */ + for (i = 0; i < nr_cpu_ids; i++) + s->data[i].value = cpufreq_get(i); + break; + + case QUADD_POWER_CLK_GPU: + /* update gpu frequency */ + s->clkp = clk_get_sys("3d", NULL); + if (s->clkp) { + s->data[0].value = + clk_get_rate(s->clkp) / 1000; + clk_put(s->clkp); + } + break; + + case QUADD_POWER_CLK_EMC: + /* update emc frequency */ + s->clkp = clk_get_sys("cpu", "emc"); + if (s->clkp) { + s->data[0].value = + clk_get_rate(s->clkp) / 1000; + clk_put(s->clkp); + } + break; + + default: + BUG(); + } + + mutex_unlock(&s->lock); + s->counter++; +} + +static int +gpu_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr) +{ + read_source(&power_ctx.gpu); + check_clks(); + + return 0; +} + +static int +emc_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr) +{ + read_source(&power_ctx.emc); + check_clks(); + + return 0; +} + +static int +cpu_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr) +{ + read_source(&power_ctx.cpu); + +#ifndef CONFIG_COMMON_CLK + read_source(&power_ctx.gpu); + read_source(&power_ctx.emc); +#endif + + check_clks(); + + return 0; +} + +static void make_sample(void) +{ + int i; + u32 extra_cpus[NR_CPUS]; + struct power_clk_source *s; + + struct quadd_record_data record; + struct quadd_power_rate_data *power_rate = &record.power_rate; + struct quadd_comm_data_interface *comm = power_ctx.quadd_ctx->comm; + + record.magic = QUADD_RECORD_MAGIC; + record.record_type = QUADD_RECORD_TYPE_POWER_RATE; + record.cpu_mode = QUADD_CPU_MODE_NONE; + + power_rate->time = quadd_get_time(); + + s = &power_ctx.cpu; + mutex_lock(&s->lock); + if (atomic_read(&s->active)) { + power_rate->nr_cpus = s->nr; + for (i = 0; i < s->nr; i++) + extra_cpus[i] = s->data[i].value; + } else { + power_rate->nr_cpus = 0; + } + mutex_unlock(&s->lock); + + s = &power_ctx.gpu; + mutex_lock(&s->lock); + if (atomic_read(&s->active)) + power_rate->gpu = s->data[0].value; + else + power_rate->gpu = 0; + + mutex_unlock(&s->lock); + + s = &power_ctx.emc; + mutex_lock(&s->lock); + if (atomic_read(&s->active)) + power_rate->emc = s->data[0].value; + else + power_rate->emc = 0; + + mutex_unlock(&s->lock); +/* + pr_debug("make_sample: cpu: %u/%u/%u/%u, gpu: %u, emc: %u\n", + extra_cpus[0], extra_cpus[1], extra_cpus[2], extra_cpus[3], + power_rate->gpu, power_rate->emc); +*/ + comm->put_sample(&record, (char *)extra_cpus, + power_rate->nr_cpus * sizeof(extra_cpus[0])); +} + +static inline int is_data_changed(struct power_clk_source *s) +{ + int i; + + mutex_lock(&s->lock); + for (i = 0; i < s->nr; i++) { + if (s->data[i].value != s->data[i].prev) { + mutex_unlock(&s->lock); + return 1; + } + } + mutex_unlock(&s->lock); + + return 0; +} + +static inline void update_data(struct power_clk_source *s) +{ + int i; + + mutex_lock(&s->lock); + + for (i = 0; i < s->nr; i++) + s->data[i].prev = s->data[i].value; + + mutex_unlock(&s->lock); +} + +static void check_clks(void) +{ + int changed = 0; + + if (is_data_changed(&power_ctx.cpu)) { + update_data(&power_ctx.cpu); + changed = 1; + } + + if (is_data_changed(&power_ctx.gpu)) { + update_data(&power_ctx.gpu); + changed = 1; + } + + if (is_data_changed(&power_ctx.emc)) { + update_data(&power_ctx.emc); + changed = 1; + } +/* + pr_debug("cpu: %lu/%lu/%lu/%lu, gpu: %lu, emc: %lu, changed: %s\n", + power_ctx.cpu.data[0].value, power_ctx.cpu.data[1].value, + power_ctx.cpu.data[2].value, power_ctx.cpu.data[3].value, + power_ctx.gpu.data[0].value, power_ctx.emc.data[0].value, + changed ? "yes" : "no"); +*/ + if (changed) + make_sample(); +} + +static void reset_data(struct power_clk_source *s) +{ + int i; + + mutex_lock(&s->lock); + for (i = 0; i < s->nr; i++) { + s->data[i].value = 0; + s->data[i].prev = 0; + } + atomic_set(s, 0); + mutex_unlock(&s->lock); +} + +static void init_source(struct power_clk_source *s, + notifier_call_ft notifier, + int nr_values, + int type) +{ + s->type = type; + s->nb.notifier_call = notifier; + s->nr = nr_values; + + mutex_init(&s->lock); + reset_data(s); +} + +static void +power_clk_work_func(struct work_struct *dummy) +{ +#ifndef CONFIG_COMMON_CLK + read_source(&power_ctx.gpu); + read_source(&power_ctx.emc); + + check_clks(); +#endif +} + +static DECLARE_WORK(power_clk_work, power_clk_work_func); + +static void power_clk_timer(unsigned long data) +{ + struct timer_list *timer = &power_ctx.timer; + + schedule_work(&power_clk_work); + timer->expires = jiffies + msecs_to_jiffies(power_ctx.period); + add_timer(timer); +} + +int quadd_power_clk_is_enabled(int *period) +{ + struct quadd_parameters *param = &power_ctx.quadd_ctx->param; + + *period = power_ctx.period; + + if (param->power_rate_freq == 0) + return 0; + + return 1; +} + +int quadd_power_clk_start(void) +{ + struct power_clk_source *s; + int status; + struct timer_list *timer = &power_ctx.timer; + struct quadd_parameters *param = &power_ctx.quadd_ctx->param; + + if (param->power_rate_freq == 0) { + pr_info("power_clk is not started\n"); + return 0; + } + +#ifdef CONFIG_COMMON_CLK + power_ctx.period = 0; +#else + power_ctx.period = MSEC_PER_SEC / param->power_rate_freq; +#endif + pr_info("power_clk: start, freq: %d\n", + param->power_rate_freq); + + /* setup gpu frequency */ + s = &power_ctx.gpu; + s->clkp = clk_get_sys("3d", NULL); + if (s->clkp) { +#ifdef CONFIG_COMMON_CLK + status = clk_notifier_register(s->clkp, s->nb); + if (status < 0) { + pr_err("error: could not setup gpu freq\n"); + return status; + } + clk_put(s->clkp); +#endif + reset_data(s); + atomic_set(&s->active, 1); + } else { + pr_err("error: could not setup gpu freq\n"); + atomic_set(&s->active, 0); + } + + /* setup emc frequency */ + s = &power_ctx.emc; + s->clkp = clk_get_sys("cpu", "emc"); + if (s->clkp) { +#ifdef CONFIG_COMMON_CLK + status = clk_notifier_register(s->clkp, s->nb); + if (status < 0) { + pr_err("error: could not setup emc freq\n"); + return status; + } + clk_put(s->clkp); +#endif + reset_data(s); + atomic_set(&s->active, 1); + } else { + pr_err("error: could not setup emc freq\n"); + atomic_set(&s->active, 0); + } + + /* setup cpu frequency notifier */ + s = &power_ctx.cpu; + status = register_cpu_notifier(&s->nb); + if (status < 0) { + pr_err("error: could not setup cpu freq\n"); + return status; + } + reset_data(s); + + if (power_ctx.period > 0) { + init_timer(timer); + timer->function = power_clk_timer; + timer->expires = jiffies + msecs_to_jiffies(power_ctx.period); + timer->data = 0; + add_timer(timer); + } + + atomic_set(&s->active, 1); + + return 0; +} + +void quadd_power_clk_stop(void) +{ + struct power_clk_source *s; + + if (power_ctx.quadd_ctx->param.power_rate_freq == 0) + return; + + if (power_ctx.period > 0) + del_timer_sync(&power_ctx.timer); + + s = &power_ctx.gpu; + if (atomic_cmpxchg(&s->active, 1, 0)) { +#ifdef CONFIG_COMMON_CLK + if (s->clkp) + clk_notifier_unregister(s->clkp, &s->nb); +#endif + } + + s = &power_ctx.emc; + if (atomic_cmpxchg(&s->active, 1, 0)) { +#ifdef CONFIG_COMMON_CLK + if (s->clkp) + clk_notifier_unregister(s->clkp, &s->nb); +#endif + } + + s = &power_ctx.cpu; + if (atomic_cmpxchg(&s->active, 1, 0)) { + pr_info("power_clk: stop\n"); + unregister_cpu_notifier(&s->nb); + } +} + +int quadd_power_clk_init(struct quadd_ctx *quadd_ctx) +{ + pr_info("power_clk: init\n"); + + init_source(&power_ctx.cpu, cpu_notifier_call, nr_cpu_ids, + QUADD_POWER_CLK_CPU); + init_source(&power_ctx.gpu, gpu_notifier_call, 1, QUADD_POWER_CLK_GPU); + init_source(&power_ctx.emc, emc_notifier_call, 1, QUADD_POWER_CLK_EMC); + + power_ctx.quadd_ctx = quadd_ctx; + + return 0; +} + +void quadd_power_clk_deinit(void) +{ + pr_info("power_clk: deinit\n"); + quadd_power_clk_stop(); +} diff --git a/drivers/misc/tegra-profiler/power_clk.h b/drivers/misc/tegra-profiler/power_clk.h new file mode 100644 index 000000000000..6854687392f3 --- /dev/null +++ b/drivers/misc/tegra-profiler/power_clk.h @@ -0,0 +1,30 @@ +/* + * drivers/misc/tegra-profiler/power_clk.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_POWER_CLK_H +#define __QUADD_POWER_CLK_H + +struct quadd_ctx; + +void quadd_power_clk_deinit(void); +int quadd_power_clk_init(struct quadd_ctx *quadd_ctx); + +int quadd_power_clk_start(void); +void quadd_power_clk_stop(void); + +int quadd_power_clk_is_enabled(int *period); + +#endif /* __QUADD_POWER_CLK_H */ diff --git a/drivers/misc/tegra-profiler/quadd.h b/drivers/misc/tegra-profiler/quadd.h new file mode 100644 index 000000000000..743482dd33b4 --- /dev/null +++ b/drivers/misc/tegra-profiler/quadd.h @@ -0,0 +1,62 @@ +/* + * drivers/misc/tegra-profiler/quadd.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_H +#define __QUADD_H + +#include <linux/tegra_profiler.h> + +/* #define QUADD_USE_EMULATE_COUNTERS 1 */ + +struct event_data; +struct quadd_comm_data_interface; +struct quadd_hrt_ctx; +struct quadd_mmap_ctx; + +struct quadd_event_source_interface { + int (*enable)(void); + void (*disable)(void); + void (*start)(void); + void (*stop)(void); + int (*read)(struct event_data *events); + int (*set_events)(int *events, int size); + int (*get_supported_events)(int *events); +}; + +struct source_info { + int supported_events[QUADD_MAX_COUNTERS]; + int nr_supported_events; + + int active; +}; + +struct quadd_ctx { + struct quadd_parameters param; + + struct quadd_event_source_interface *pmu; + struct source_info pmu_info; + + struct quadd_event_source_interface *pl310; + struct source_info pl310_info; + + struct quadd_comm_data_interface *comm; + struct quadd_hrt_ctx *hrt; + struct quadd_mmap_ctx *mmap; + + atomic_t started; +}; + +#endif /* __QUADD_H */ diff --git a/drivers/misc/tegra-profiler/tegra.h b/drivers/misc/tegra-profiler/tegra.h new file mode 100644 index 000000000000..013c5abd644f --- /dev/null +++ b/drivers/misc/tegra-profiler/tegra.h @@ -0,0 +1,48 @@ +/* + * drivers/misc/tegra-profiler/tegra.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_TEGRA_H +#define __QUADD_TEGRA_H + +#include <linux/smp.h> + +#ifdef CONFIG_TEGRA_CLUSTER_CONTROL +#include <linux/io.h> +#include <../../mach-tegra/pm.h> +#endif + +static inline int quadd_get_processor_id(void) +{ + int cpu_id = smp_processor_id(); + +#ifdef CONFIG_TEGRA_CLUSTER_CONTROL + if (is_lp_cluster()) + cpu_id |= QM_TEGRA_POWER_CLUSTER_LP; +#endif + + return cpu_id; +} + +static inline int quadd_is_cpu_with_lp_cluster(void) +{ +#ifdef CONFIG_TEGRA_CLUSTER_CONTROL + return 1; +#else + return 0; +#endif +} + +#endif /* __QUADD_TEGRA_H */ diff --git a/drivers/misc/tegra-profiler/version.h b/drivers/misc/tegra-profiler/version.h new file mode 100644 index 000000000000..b5cf277a24b3 --- /dev/null +++ b/drivers/misc/tegra-profiler/version.h @@ -0,0 +1,23 @@ +/* + * drivers/misc/tegra-profiler/hrt.h + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __QUADD_VERSION_H +#define __QUADD_VERSION_H + +#define QUADD_MODULE_VERSION "1.23" +#define QUADD_MODULE_BRANCH "Dev" + +#endif /* __QUADD_VERSION_H */ |