From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Dec 2008 20:12:29 +0100 Subject: performance counters: core code Implement the core kernel bits of Performance Counters subsystem. The Linux Performance Counter subsystem provides an abstraction of performance counter hardware capabilities. It provides per task and per CPU counters, and it provides event capabilities on top of those. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. The special file descriptor is opened via the perf_counter_open() system call: int perf_counter_open(u32 hw_event_type, u32 hw_event_period, u32 record_type, pid_t pid, int cpu); The syscall returns the new fd. The fd can be used via the normal VFS system calls: read() can be used to read the counter, fcntl() can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. See more details in Documentation/perf-counters.txt. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 171 +++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 9 +++ include/linux/syscalls.h | 6 ++ 3 files changed, 186 insertions(+) create mode 100644 include/linux/perf_counter.h (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 000000000000..22c4469abf44 --- /dev/null +++ b/include/linux/perf_counter.h @@ -0,0 +1,171 @@ +/* + * Performance counters: + * + * Copyright(C) 2008, Thomas Gleixner + * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_COUNTER_H +#define _LINUX_PERF_COUNTER_H + +#include + +#include +#include +#include +#include +#include + +struct task_struct; + +/* + * Generalized hardware event types, used by the hw_event_type parameter + * of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + PERF_COUNT_CYCLES, + PERF_COUNT_INSTRUCTIONS, + PERF_COUNT_CACHE_REFERENCES, + PERF_COUNT_CACHE_MISSES, + PERF_COUNT_BRANCH_INSTRUCTIONS, + PERF_COUNT_BRANCH_MISSES, + /* + * If this bit is set in the type, then trigger NMI sampling: + */ + PERF_COUNT_NMI = (1 << 30), +}; + +/* + * IRQ-notification data record type: + */ +enum perf_record_type { + PERF_RECORD_SIMPLE, + PERF_RECORD_IRQ, + PERF_RECORD_GROUP, +}; + +/** + * struct hw_perf_counter - performance counter hardware details + */ +struct hw_perf_counter { + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + u64 prev_count; + s32 next_count; + u64 irq_period; +}; + +/* + * Hardcoded buffer length limit for now, for IRQ-fed events: + */ +#define PERF_DATA_BUFLEN 2048 + +/** + * struct perf_data - performance counter IRQ data sampling ... + */ +struct perf_data { + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; +}; + +/** + * struct perf_counter - performance counter kernel representation: + */ +struct perf_counter { + struct list_head list; + int active; +#if BITS_PER_LONG == 64 + atomic64_t count; +#else + atomic_t count32[2]; +#endif + u64 __irq_period; + + struct hw_perf_counter hw; + + struct perf_counter_context *ctx; + struct task_struct *task; + + /* + * Protect attach/detach: + */ + struct mutex mutex; + + int oncpu; + int cpu; + + s32 hw_event_type; + enum perf_record_type record_type; + + /* read() / irq related data */ + wait_queue_head_t waitq; + /* optional: for NMIs */ + int wakeup_pending; + struct perf_data *irqdata; + struct perf_data *usrdata; + struct perf_data data[2]; +}; + +/** + * struct perf_counter_context - counter context structure + * + * Used as a container for task counters and CPU counters as well: + */ +struct perf_counter_context { +#ifdef CONFIG_PERF_COUNTERS + /* + * Protect the list of counters: + */ + spinlock_t lock; + struct list_head counters; + int nr_counters; + int nr_active; + struct task_struct *task; +#endif +}; + +/** + * struct perf_counter_cpu_context - per cpu counter context structure + */ +struct perf_cpu_context { + struct perf_counter_context ctx; + struct perf_counter_context *task_ctx; + int active_oncpu; + int max_pertask; +}; + +/* + * Set by architecture code: + */ +extern int perf_max_counters; + +#ifdef CONFIG_PERF_COUNTERS +extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); +extern void perf_counter_task_tick(struct task_struct *task, int cpu); +extern void perf_counter_init_task(struct task_struct *task); +extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_print_debug(void); +#else +static inline void +perf_counter_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_sched_out(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_tick(struct task_struct *task, int cpu) { } +static inline void perf_counter_init_task(struct task_struct *task) { } +static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_print_debug(void) { } +#endif + +#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 55e30d114477..4c530278391b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,6 +71,7 @@ struct sched_param { #include #include #include +#include #include #include #include @@ -1326,6 +1327,7 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif + struct perf_counter_context perf_counter_ctx; #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; @@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +/* + * Call the function if the target task is executing on a CPU right now: + */ +extern void task_oncpu_function_call(struct task_struct *p, + void (*func) (void *info), void *info); + + #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 04fb47bfb920..6cce728a6263 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +asmlinkage int +sys_perf_counter_open(u32 hw_event_type, + u32 hw_event_period, + u32 record_type, + pid_t pid, + int cpu); #endif -- cgit v1.2.3 From 4ac13294e44664bb7edf4daf52edb71e7c6bbe84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Dec 2008 21:43:39 +0100 Subject: perf counters: protect them against CSTATE transitions Impact: fix rare lost events problem There are CPUs whose performance counters misbehave on CSTATE transitions, so provide a way to just disable/enable them around deep idle methods. (hw_perf_enable_all() is cheap on x86.) Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 22c4469abf44..5031b5614f25 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -156,6 +156,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *task); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); +extern void hw_perf_restore_ctrl(u64 ctrl); +extern u64 hw_perf_disable_all(void); #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -166,6 +168,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } +static inline void hw_perf_restore_ctrl(u64 ctrl) { } +static inline u64 hw_perf_disable_all(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ -- cgit v1.2.3 From eab656ae04b9d3b83265e3db01c0d2c46b748ef7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Dec 2008 19:26:59 +0100 Subject: perf counters: clean up 'raw' type API Impact: cleanup Introduce a separate hw_event type. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++++++ include/linux/syscalls.h | 8 +++----- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5031b5614f25..daedd7d87c2a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -38,6 +38,7 @@ enum hw_event_types { * If this bit is set in the type, then trigger NMI sampling: */ PERF_COUNT_NMI = (1 << 30), + PERF_COUNT_RAW = (1 << 31), }; /* @@ -49,6 +50,12 @@ enum perf_record_type { PERF_RECORD_GROUP, }; +struct perf_counter_event { + u32 hw_event_type; + u32 hw_event_period; + u64 hw_raw_ctrl; +}; + /** * struct hw_perf_counter - performance counter hardware details */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6cce728a6263..3ecd73d03daa 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,6 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; +struct perf_counter_event; #include #include @@ -625,9 +626,6 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage int -sys_perf_counter_open(u32 hw_event_type, - u32 hw_event_period, - u32 record_type, - pid_t pid, - int cpu); +sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, + pid_t pid, int cpu, int masterfd); #endif -- cgit v1.2.3 From dfa7c899b401d7dc5d85aca416aee64ac82812f2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Dec 2008 19:35:37 +0100 Subject: perf counters: expand use of counter->event Impact: change syscall, cleanup Make use of the new perf_counters event type. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index daedd7d87c2a..1f0017673e77 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -96,8 +96,7 @@ struct perf_counter { #else atomic_t count32[2]; #endif - u64 __irq_period; - + struct perf_counter_event event; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -111,7 +110,6 @@ struct perf_counter { int oncpu; int cpu; - s32 hw_event_type; enum perf_record_type record_type; /* read() / irq related data */ -- cgit v1.2.3 From 9f66a3810fe0d4100972db84290f3ae4a4d77025 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 10 Dec 2008 12:33:23 +0100 Subject: perf counters: restructure the API Impact: clean up new API Thorough cleanup of the new perf counters API, we now get clean separation of the various concepts: - introduce perf_counter_hw_event to separate out the event source details - move special type flags into separate attributes: PERF_COUNT_NMI, PERF_COUNT_RAW - extend the type to u64 and reserve it fully to the architecture in the raw type case. And make use of all these changes in the core and x86 perfcounters code. Also change the syscall signature to: asmlinkage int sys_perf_counter_open( struct perf_counter_hw_event *hw_event_uptr __user, pid_t pid, int cpu, int group_fd); ( Note that group_fd is unused for now - it's reserved for the counter groups abstraction. ) Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 98 ++++++++++++++++++++++++++++---------------- include/linux/syscalls.h | 12 ++++-- 2 files changed, 70 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1f0017673e77..a2b4852e2d70 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -24,65 +24,93 @@ struct task_struct; /* - * Generalized hardware event types, used by the hw_event_type parameter - * of the sys_perf_counter_open() syscall: + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: */ enum hw_event_types { - PERF_COUNT_CYCLES, - PERF_COUNT_INSTRUCTIONS, - PERF_COUNT_CACHE_REFERENCES, - PERF_COUNT_CACHE_MISSES, - PERF_COUNT_BRANCH_INSTRUCTIONS, - PERF_COUNT_BRANCH_MISSES, /* - * If this bit is set in the type, then trigger NMI sampling: + * Common hardware events, generalized by the kernel: */ - PERF_COUNT_NMI = (1 << 30), - PERF_COUNT_RAW = (1 << 31), + PERF_COUNT_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, }; /* * IRQ-notification data record type: */ -enum perf_record_type { - PERF_RECORD_SIMPLE, - PERF_RECORD_IRQ, - PERF_RECORD_GROUP, +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, }; -struct perf_counter_event { - u32 hw_event_type; - u32 hw_event_period; - u64 hw_raw_ctrl; +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + u64 type; + + u64 irq_period; + u32 record_type; + + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + __reserved_1 : 29; + + u64 __reserved_2; }; +/* + * Kernel-internal data types: + */ + /** - * struct hw_perf_counter - performance counter hardware details + * struct hw_perf_counter - performance counter hardware details: */ struct hw_perf_counter { - u64 config; - unsigned long config_base; - unsigned long counter_base; - int nmi; - unsigned int idx; - u64 prev_count; - s32 next_count; - u64 irq_period; + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + u64 prev_count; + u64 irq_period; + s32 next_count; }; /* * Hardcoded buffer length limit for now, for IRQ-fed events: */ -#define PERF_DATA_BUFLEN 2048 +#define PERF_DATA_BUFLEN 2048 /** * struct perf_data - performance counter IRQ data sampling ... */ struct perf_data { - int len; - int rd_idx; - int overrun; - u8 data[PERF_DATA_BUFLEN]; + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; }; /** @@ -96,7 +124,7 @@ struct perf_counter { #else atomic_t count32[2]; #endif - struct perf_counter_event event; + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -110,8 +138,6 @@ struct perf_counter { int oncpu; int cpu; - enum perf_record_type record_type; - /* read() / irq related data */ wait_queue_head_t waitq; /* optional: for NMIs */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3ecd73d03daa..a549678b7c3c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; -struct perf_counter_event; +struct perf_counter_hw_event; #include #include @@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage int -sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, - pid_t pid, int cpu, int masterfd); + +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd); #endif -- cgit v1.2.3 From 04289bb9891882202d7e961c4c04d2376930e9f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 08:38:42 +0100 Subject: perf counters: add support for group counters Impact: add group counters This patch adds the "counter groups" abstraction. Groups of counters behave much like normal 'single' counters, with a few semantic and behavioral extensions on top of that. A counter group is created by creating a new counter with the open() syscall's group-leader group_fd file descriptor parameter pointing to another, already existing counter. Groups of counters are scheduled in and out in one atomic group, and they are also roundrobin-scheduled atomically. Counters that are member of a group can also record events with an (atomic) extended timestamp that extends to all members of the group, if the record type is set to PERF_RECORD_GROUP. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a2b4852e2d70..7af7d8965460 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -117,7 +117,10 @@ struct perf_data { * struct perf_counter - performance counter kernel representation: */ struct perf_counter { - struct list_head list; + struct list_head list_entry; + struct list_head sibling_list; + struct perf_counter *group_leader; + int active; #if BITS_PER_LONG == 64 atomic64_t count; @@ -158,7 +161,8 @@ struct perf_counter_context { * Protect the list of counters: */ spinlock_t lock; - struct list_head counters; + + struct list_head counter_list; int nr_counters; int nr_active; struct task_struct *task; -- cgit v1.2.3 From 621a01eac89b5e2f81a4cf576568b31f40a02724 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 12:46:46 +0100 Subject: perf counters: hw driver API Impact: restructure code, introduce hw_ops driver abstraction Introduce this abstraction to handle counter details: struct hw_perf_counter_ops { void (*hw_perf_counter_enable) (struct perf_counter *counter); void (*hw_perf_counter_disable) (struct perf_counter *counter); void (*hw_perf_counter_read) (struct perf_counter *counter); }; This will be useful to support assymetric hw details, and it will also be useful to implement "software counters". (Counters that count kernel managed sw events such as pagefaults, context-switches, wall-clock time or task-local time.) Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7af7d8965460..27385641ecb6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -113,6 +113,17 @@ struct perf_data { u8 data[PERF_DATA_BUFLEN]; }; +struct perf_counter; + +/** + * struct hw_perf_counter_ops - performance counter hw ops + */ +struct hw_perf_counter_ops { + void (*hw_perf_counter_enable) (struct perf_counter *counter); + void (*hw_perf_counter_disable) (struct perf_counter *counter); + void (*hw_perf_counter_read) (struct perf_counter *counter); +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -120,6 +131,7 @@ struct perf_counter { struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; + struct hw_perf_counter_ops *hw_ops; int active; #if BITS_PER_LONG == 64 @@ -185,6 +197,9 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS +extern struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter); + extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -- cgit v1.2.3 From 5c92d12411dfe5f0f3d1b1c1e2f756245e6f7249 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 13:21:10 +0100 Subject: perf counters: implement PERF_COUNT_CPU_CLOCK Impact: add new perf-counter type The 'CPU clock' counter counts the amount of CPU clock time that is elapsing, in nanoseconds. (regardless of how much of it the task is spending on a CPU executing) This counter type is a Linux kernel based abstraction, it is available even if the hardware does not support native hardware performance counters. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 27385641ecb6..9a1713a1be27 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -131,7 +131,7 @@ struct perf_counter { struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; - struct hw_perf_counter_ops *hw_ops; + const struct hw_perf_counter_ops *hw_ops; int active; #if BITS_PER_LONG == 64 @@ -197,7 +197,7 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS -extern struct hw_perf_counter_ops * +extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); @@ -208,6 +208,9 @@ extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern void hw_perf_restore_ctrl(u64 ctrl); extern u64 hw_perf_disable_all(void); +extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); +extern u64 atomic64_counter_read(struct perf_counter *counter); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -219,7 +222,7 @@ static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore_ctrl(u64 ctrl) { } -static inline u64 hw_perf_disable_all(void) { return 0; } +static inline u64 hw_perf_disable_all(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ -- cgit v1.2.3 From 01b2838c4298c5e0d30b4993c195ac34dd9df61e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 13:45:51 +0100 Subject: perf counters: consolidate hw_perf save/restore APIs Impact: cleanup Rename them to better match up the usual IRQ disable/enable APIs: hw_perf_disable_all() => hw_perf_save_disable() hw_perf_restore_ctrl() => hw_perf_restore() Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9a1713a1be27..68f6e3ad531f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -67,7 +67,7 @@ enum perf_counter_record_type { * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - u64 type; + s64 type; u64 irq_period; u32 record_type; @@ -206,8 +206,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *task); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); -extern void hw_perf_restore_ctrl(u64 ctrl); -extern u64 hw_perf_disable_all(void); +extern u64 hw_perf_save_disable(void); +extern void hw_perf_restore(u64 ctrl); extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); extern u64 atomic64_counter_read(struct perf_counter *counter); @@ -221,8 +221,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } -static inline void hw_perf_restore_ctrl(u64 ctrl) { } -static inline u64 hw_perf_disable_all(void) { return 0; } +static inline void hw_perf_restore(u64 ctrl) { } +static inline u64 hw_perf_save_disable(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ -- cgit v1.2.3 From bae43c9945ebeef15e7952e317efb02393d3bfc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 14:03:20 +0100 Subject: perf counters: implement PERF_COUNT_TASK_CLOCK Impact: add new perf-counter type The 'task clock' counter counts the amount of time a task is executing, in nanoseconds. It stops ticking when a task is scheduled out either due to it blocking, sleeping or it being preempted. This counter type is a Linux kernel based abstraction, it is available even if the hardware does not support native hardware performance counters. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 68f6e3ad531f..30c0ec8c1ee3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -50,8 +50,11 @@ enum hw_event_types { */ PERF_COUNT_CPU_CLOCK = -1, PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, + /* + * Future software events: + */ + /* PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, */ }; /* -- cgit v1.2.3 From 1d1c7ddbfab358445a542715551301b7fc363e28 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 14:59:31 +0100 Subject: perf counters: add prctl interface to disable/enable counters Add a way for self-monitoring tasks to disable/enable counters summarily, via a prctl: PR_TASK_PERF_COUNTERS_DISABLE 31 PR_TASK_PERF_COUNTERS_ENABLE 32 Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++++ include/linux/prctl.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 30c0ec8c1ee3..97d86c293ee8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -213,6 +213,8 @@ extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); extern u64 atomic64_counter_read(struct perf_counter *counter); +extern int perf_counter_task_disable(void); +extern int perf_counter_task_enable(void); #else static inline void @@ -226,6 +228,8 @@ static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } +static inline int perf_counter_task_disable(void) { return -EINVAL; } +static inline int perf_counter_task_enable(void) { return -EINVAL; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e7..b00df4c79c63 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,4 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 6a930700c8b655a9e25e42fc4adc0b225ebbcefc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 15:17:03 +0100 Subject: perf counters: clean up state transitions Impact: cleanup Introduce a proper enum for the 3 states of a counter: PERF_COUNTER_STATE_OFF = -1 PERF_COUNTER_STATE_INACTIVE = 0 PERF_COUNTER_STATE_ACTIVE = 1 and rename counter->active to counter->state and propagate the changes everywhere. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 97d86c293ee8..8cb095fa442c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -127,6 +127,15 @@ struct hw_perf_counter_ops { void (*hw_perf_counter_read) (struct perf_counter *counter); }; +/** + * enum perf_counter_active_state - the states of a counter + */ +enum perf_counter_active_state { + PERF_COUNTER_STATE_OFF = -1, + PERF_COUNTER_STATE_INACTIVE = 0, + PERF_COUNTER_STATE_ACTIVE = 1, +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -136,7 +145,7 @@ struct perf_counter { struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; - int active; + enum perf_counter_active_state state; #if BITS_PER_LONG == 64 atomic64_t count; #else -- cgit v1.2.3 From ee06094f8279e1312fc0a31591320cc7b6f0ab1e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Dec 2008 09:00:03 +0100 Subject: perfcounters: restructure x86 counter math Impact: restructure code Change counter math from absolute values to clear delta logic. We try to extract elapsed deltas from the raw hw counter - and put that into the generic counter. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8cb095fa442c..72460289c654 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -91,14 +91,16 @@ struct perf_counter_hw_event { * struct hw_perf_counter - performance counter hardware details: */ struct hw_perf_counter { +#ifdef CONFIG_PERF_COUNTERS u64 config; unsigned long config_base; unsigned long counter_base; int nmi; unsigned int idx; - u64 prev_count; + atomic64_t prev_count; u64 irq_period; - s32 next_count; + atomic64_t period_left; +#endif }; /* @@ -140,17 +142,15 @@ enum perf_counter_active_state { * struct perf_counter - performance counter kernel representation: */ struct perf_counter { +#ifdef CONFIG_PERF_COUNTERS struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; enum perf_counter_active_state state; -#if BITS_PER_LONG == 64 atomic64_t count; -#else - atomic_t count32[2]; -#endif + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; @@ -172,6 +172,7 @@ struct perf_counter { struct perf_data *irqdata; struct perf_data *usrdata; struct perf_data data[2]; +#endif }; /** @@ -220,8 +221,6 @@ extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); -extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); -extern u64 atomic64_counter_read(struct perf_counter *counter); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); -- cgit v1.2.3 From 9b51f66dcb09ac5eb6bc68fc111d5c7a1e0131d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 12 Dec 2008 13:49:45 +0100 Subject: perfcounters: implement "counter inheritance" Impact: implement new performance feature Counter inheritance can be used to run performance counters in a workload, transparently - and pipe back the counter results to the parent counter. Inheritance for performance counters works the following way: when creating a counter it can be marked with the .inherit=1 flag. Such counters are then 'inherited' by all child tasks (be they fork()-ed or clone()-ed). These counters get inherited through exec() boundaries as well (except through setuid boundaries). The counter values get added back to the parent counter(s) when the child task(s) exit - much like stime/utime statistics are gathered. So inherited counters are ideal to gather summary statistics about an application's behavior via shell commands, without having to modify that application. The timec.c command utilizes counter inheritance: http://redhat.com/~mingo/perfcounters/timec.c Sample output: $ ./timec -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null Performance counter stats for 'ls': 163516953 instructions 2295 cache-misses 2855182 branch-misses Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 72460289c654..e5d25bf8f74e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -75,10 +75,11 @@ struct perf_counter_hw_event { u64 irq_period; u32 record_type; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - __reserved_1 : 29; + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + __reserved_1 : 28; u64 __reserved_2; }; @@ -138,6 +139,8 @@ enum perf_counter_active_state { PERF_COUNTER_STATE_ACTIVE = 1, }; +struct file; + /** * struct perf_counter - performance counter kernel representation: */ @@ -156,7 +159,10 @@ struct perf_counter { struct perf_counter_context *ctx; struct task_struct *task; + struct file *filp; + unsigned int nr_inherited; + struct perf_counter *parent; /* * Protect attach/detach: */ @@ -210,13 +216,16 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS +extern void +perf_counter_show(struct perf_counter *counter, char *str, int trace); extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern void perf_counter_init_task(struct task_struct *task); +extern void perf_counter_init_task(struct task_struct *child); +extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern u64 hw_perf_save_disable(void); @@ -226,12 +235,15 @@ extern int perf_counter_task_enable(void); #else static inline void +perf_counter_show(struct perf_counter *counter, char *str, int trace) { } +static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void perf_counter_task_sched_out(struct task_struct *task, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline void perf_counter_init_task(struct task_struct *task) { } +static inline void perf_counter_init_task(struct task_struct *child) { } +static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore(u64 ctrl) { } -- cgit v1.2.3 From 5d6a27d8a096868ae313f71f563b06074a7e34fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:28:33 +0100 Subject: perfcounters: add context switch counter Impact: add new feature, new sw counter Add a counter that counts the number of context-switches a task is doing. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e5d25bf8f74e..d2a16563415f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -53,8 +53,8 @@ enum hw_event_types { /* * Future software events: */ - /* PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, */ + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, }; /* -- cgit v1.2.3 From 6c594c21fcb02c662f11c97be4d7d2b73060a205 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:34:15 +0100 Subject: perfcounters: add task migrations counter Impact: add new feature, new sw counter Add a counter that counts the number of cross-CPU migrations a task is suffering. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 +++++--- include/linux/sched.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d2a16563415f..f30486fc55d7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -42,6 +42,8 @@ enum hw_event_types { PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_MISSES = 5, + PERF_HW_EVENTS_MAX = 6, + /* * Special "software" counters provided by the kernel, even if * the hardware does not support performance counters. These @@ -50,11 +52,11 @@ enum hw_event_types { */ PERF_COUNT_CPU_CLOCK = -1, PERF_COUNT_TASK_CLOCK = -2, - /* - * Future software events: - */ PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c530278391b..2e15be8fc792 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1014,6 +1014,8 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; + u64 nr_migrations; + #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; @@ -1029,7 +1031,6 @@ struct sched_entity { u64 exec_max; u64 slice_max; - u64 nr_migrations; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; -- cgit v1.2.3 From 8fb9331391af95ca1f4e5c0a0da8120b13cbae01 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:04:16 +0100 Subject: perfcounters: remove warnings Impact: remove debug checks Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f30486fc55d7..d038450de87d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -218,8 +218,6 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS -extern void -perf_counter_show(struct perf_counter *counter, char *str, int trace); extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); @@ -237,8 +235,6 @@ extern int perf_counter_task_enable(void); #else static inline void -perf_counter_show(struct perf_counter *counter, char *str, int trace) { } -static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void perf_counter_task_sched_out(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:09:13 +0100 Subject: x86, perfcounters: prepare for fixed-mode PMCs Impact: refactor the x86 code for fixed-mode PMCs Extend the data structures and rename the existing facilities to allow for a 'generic' versus 'fixed' counter distinction. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d038450de87d..984da540224b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #include #include -- cgit v1.2.3 From 7671581f1666ef4b54a1c1e598c51ac44c060a9b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 14:20:28 +0100 Subject: perfcounters: hw ops rename Impact: rename field names Shorten them. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 984da540224b..48f76d2e54c2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,9 +128,9 @@ struct perf_counter; * struct hw_perf_counter_ops - performance counter hw ops */ struct hw_perf_counter_ops { - void (*hw_perf_counter_enable) (struct perf_counter *counter); - void (*hw_perf_counter_disable) (struct perf_counter *counter); - void (*hw_perf_counter_read) (struct perf_counter *counter); + void (*enable) (struct perf_counter *counter); + void (*disable) (struct perf_counter *counter); + void (*read) (struct perf_counter *counter); }; /** -- cgit v1.2.3 From aa9c4c0f967fdb482ea95e8473ec3d201e6e0781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 14:10:57 +0100 Subject: perfcounters: fix task clock counter Impact: fix per task clock counter precision Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4a145caeee07..1b2e3242497c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } + +/* + * Lock/unlock the current runqueue - to extract task statistics: + */ +extern void curr_rq_lock_irq_save(unsigned long *flags); +extern void curr_rq_unlock_irq_restore(unsigned long *flags); +extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); + extern void account_user_time(struct task_struct *, cputime_t); extern void account_user_time_scaled(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); -- cgit v1.2.3 From eef6cbf5844c620d9db9be99e4908cdf92492fb9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Dec 2008 10:20:42 +0100 Subject: perfcounters: pull inherited counters Change counter inheritance from a 'push' to a 'pull' model: instead of child tasks pushing their final counts to the parent, reuse the wait4 infrastructure to pull counters as child tasks are exit-processed, much like how cutime/cstime is collected. Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e5..54fa2fa2c8e4 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,14 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_ctx.counter_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -180,6 +188,7 @@ extern struct group_info init_groups; INIT_IDS \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + INIT_PERF_COUNTERS(tsk) \ } -- cgit v1.2.3 From 78b6084c907cea15bb40a564b974e072f5163781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 15:07:49 +0100 Subject: perfcounters: fix init context lock Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 54fa2fa2c8e4..467cff545c30 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -116,7 +116,9 @@ extern struct group_info init_groups; #ifdef CONFIG_PERF_COUNTERS # define INIT_PERF_COUNTERS(tsk) \ .perf_counter_ctx.counter_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.lock = \ + __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), #else # define INIT_PERF_COUNTERS(tsk) #endif -- cgit v1.2.3 From 95cdd2e7851cce79ab839cb0b3cbe68d7911d0f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 13:50:42 +0100 Subject: perfcounters: enable lowlevel pmc code to schedule counters Allow lowlevel ->enable() op to return an error if a counter can not be added. This can be used to handle counter constraints. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 48f76d2e54c2..53af11d3767b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,7 +128,7 @@ struct perf_counter; * struct hw_perf_counter_ops - performance counter hw ops */ struct hw_perf_counter_ops { - void (*enable) (struct perf_counter *counter); + int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); }; -- cgit v1.2.3 From 8fe91e61cdc407c7556d3cd71cf20141a25bbcea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:29:25 +0100 Subject: perfcounters: remove ->nr_inherited Impact: remove dead code nr_inherited was not maintained correctly (not decremented) - and also not used - remove it. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 53af11d3767b..1ea08e9f31ce 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -164,7 +164,6 @@ struct perf_counter { struct task_struct *task; struct file *filp; - unsigned int nr_inherited; struct perf_counter *parent; /* * Protect attach/detach: -- cgit v1.2.3 From f650a672359819454c3d8d4135ecd1558cde0b24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:17:29 +0100 Subject: perfcounters: add PERF_COUNT_BUS_CYCLES Generalize "bus cycles" hw events - and map them to CPU_CLK_Unhalted.Ref on x86. (which is a good enough approximation) Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1ea08e9f31ce..ec77d1643d37 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -36,14 +36,15 @@ enum hw_event_types { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CYCLES = 0, + PERF_COUNT_CPU_CYCLES = 0, PERF_COUNT_INSTRUCTIONS = 1, PERF_COUNT_CACHE_REFERENCES = 2, PERF_COUNT_CACHE_MISSES = 3, PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 6, + PERF_HW_EVENTS_MAX = 7, /* * Special "software" counters provided by the kernel, even if -- cgit v1.2.3 From e44aef58ecfbe061eb4c53b939bcc35fb1bee82d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 25 Dec 2008 09:02:11 +0100 Subject: perfcounters: include asm/perf_counter.h only if CONFIG_PERF_COUNTERS=y Impact: build fix on ia64 KOSAKI Motohiro reported that -tip doesnt build on ia64 because asm/perf_counter.h only exists on x86 for now. Fix it. Reported-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Acked-by: KOSAKI Motohiro Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ec77d1643d37..cc3a75a239a9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,7 +14,10 @@ #define _LINUX_PERF_COUNTER_H #include -#include + +#ifdef CONFIG_PERF_COUNTERS +# include +#endif #include #include -- cgit v1.2.3 From 3cbed429a9ccdb7a243f733b1056fe5c39e9004c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:43:42 +1100 Subject: perf_counter: Add optional hw_perf_group_sched_in arch function Impact: extend perf_counter infrastructure This adds an optional hw_perf_group_sched_in() arch function that enables a whole group of counters in one go. It returns 1 if it added the group successfully, 0 if it did nothing (and therefore the core needs to add the counters individually), or a negative number if an error occurred. It should add all the counters and enable any software counters in the group, or else add none of them and return an error. There are a couple of related changes/improvements in the group handling here: * As an optimization, group_sched_out() and group_sched_in() now check the state of the group leader, and do nothing if the leader is not active or disabled. * We now call hw_perf_save_disable/hw_perf_restore around the complete set of counter enable/disable calls in __perf_counter_sched_in/out, to give the arch code the opportunity to defer updating the hardware state until the hw_perf_restore call if it wants. * We no longer stop adding groups after we get to a group that has more than one counter. We will ultimately add an option for a group to be exclusive. The current code doesn't really implement exclusive groups anyway, since a group could end up going on with other counters that get added before it. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index cc3a75a239a9..b21d1ea4c054 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -236,6 +236,9 @@ extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu); #else static inline void -- cgit v1.2.3 From 3b6f9e5cb21964b7ce12bf81076f830885563ec8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2009 21:00:30 +1100 Subject: perf_counter: Add support for pinned and exclusive counter groups Impact: New perf_counter features A pinned counter group is one that the user wants to have on the CPU whenever possible, i.e. whenever the associated task is running, for a per-task group, or always for a per-cpu group. If the system cannot satisfy that, it puts the group into an error state where it is not scheduled any more and reads from it return EOF (i.e. 0 bytes read). The group can be released from error state and made readable again using prctl(PR_TASK_PERF_COUNTERS_ENABLE). When we have finer-grained enable/disable controls on counters we'll be able to reset the error state on individual groups. An exclusive group is one that the user wants to be the only group using the CPU performance monitor hardware whenever it is on. The counter group scheduler will not schedule an exclusive group if there are already other groups on the CPU and will not schedule other groups onto the CPU if there is an exclusive group scheduled (that statement does not apply to groups containing only software counters, which can always go on and which do not prevent an exclusive group from going on). With an exclusive group, we will be able to let users program PMU registers at a low level without the concern that those settings will perturb other measurements. Along the way this reorganizes things a little: - is_software_counter() is moved to perf_counter.h. - cpuctx->active_oncpu now records the number of hardware counters on the CPU, i.e. it now excludes software counters. Nothing was reading cpuctx->active_oncpu before, so this change is harmless. - A new cpuctx->exclusive field records whether we currently have an exclusive group on the CPU. - counter_sched_out moves higher up in perf_counter.c and gets called from __perf_counter_remove_from_context and __perf_counter_exit_task, where we used to have essentially the same code. - __perf_counter_sched_in now goes through the counter list twice, doing the pinned counters in the first loop and the non-pinned counters in the second loop, in order to give the pinned counters the best chance to be scheduled in. Note that only a group leader can be exclusive or pinned, and that attribute applies to the whole group. This avoids some awkwardness in some corner cases (e.g. where a group leader is closed and the other group members get added to the context list). If we want to relax that restriction later, we can, and it is easier to relax a restriction than to apply a new one. This doesn't yet handle the case where a pinned counter is inherited and goes into error state in the child - the error state is not propagated up to the parent when the child exits, and arguably it should. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b21d1ea4c054..7ab8e5f96f5b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -86,7 +86,10 @@ struct perf_counter_hw_event { nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ - __reserved_1 : 28; + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only counter on PMU */ + + __reserved_1 : 26; u64 __reserved_2; }; @@ -141,6 +144,7 @@ struct hw_perf_counter_ops { * enum perf_counter_active_state - the states of a counter */ enum perf_counter_active_state { + PERF_COUNTER_STATE_ERROR = -2, PERF_COUNTER_STATE_OFF = -1, PERF_COUNTER_STATE_INACTIVE = 0, PERF_COUNTER_STATE_ACTIVE = 1, @@ -214,6 +218,7 @@ struct perf_cpu_context { struct perf_counter_context *task_ctx; int active_oncpu; int max_pertask; + int exclusive; }; /* @@ -240,6 +245,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From d859e29fe34cb833071b20aef860ee94fbad9bb2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 17 Jan 2009 18:10:22 +1100 Subject: perf_counter: Add counter enable/disable ioctls Impact: New perf_counter features This primarily adds a way for perf_counter users to enable and disable counters and groups. Enabling or disabling a counter or group also enables or disables all of the child counters that have been cloned from it to monitor children of the task monitored by the top-level counter. The userspace interface to enable/disable counters is via ioctl on the counter file descriptor. Along the way this extends the code that handles child counters to handle child counter groups properly. A group with multiple counters will be cloned to child tasks if and only if the group leader has the hw_event.inherit bit set - if it is set the whole group is cloned as a group in the child task. In order to be able to enable or disable all child counters of a given top-level counter, we need a way to find them all. Hence I have added a child_list field to struct perf_counter, which is the head of the list of children for a top-level counter, or the link in that list for a child counter. That list is protected by the perf_counter.mutex field. This also adds a mutex to the perf_counter_context struct. Previously the list of counters was protected just by the lock field in the context, which meant that perf_counter_init_task had to take that lock and then take whatever lock/mutex protects the top-level counter's child_list. But the counter enable/disable functions need to take that lock in order to traverse the list, then for each counter take the lock in that counter's context in order to change the counter's state safely, which will lead to a deadlock. To solve this, we now have both a mutex and a spinlock in the context, and taking either is sufficient to ensure the list of counters can't change - you have to take both before changing the list. Now perf_counter_init_task takes the mutex instead of the lock (which incidentally means that inherit_counter can use GFP_KERNEL instead of GFP_ATOMIC) and thus avoids the possible deadlock. Similarly the new enable/disable functions can take the mutex while traversing the list of child counters without incurring a possible deadlock when the counter manipulation code locks the context for a child counter. We also had an misfeature that the first counter added to a context would possibly not go on until the next sched-in, because we were using ctx->nr_active to detect if the context was running on a CPU. But nr_active is the number of active counters, and if that was zero (because the context didn't have any counters yet) it would look like the context wasn't running on a cpu and so the retry code in __perf_install_in_context wouldn't retry. So this adds an 'is_active' field that is set when the context is on a CPU, even if it has no counters. The is_active field is only used for task contexts, not for per-cpu contexts. If we enable a subsidiary counter in a group that is active on a CPU, and the arch code can't enable the counter, then we have to pull the whole group off the CPU. We do this with group_sched_out, which gets moved up in the file so it comes before all its callers. This also adds similar logic to __perf_install_in_context so that the "all on, or none" invariant of groups is preserved when adding a new counter to a group. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7ab8e5f96f5b..33ba9fe0a781 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #ifdef CONFIG_PERF_COUNTERS # include @@ -94,6 +95,12 @@ struct perf_counter_hw_event { u64 __reserved_2; }; +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + /* * Kernel-internal data types: */ @@ -173,8 +180,10 @@ struct perf_counter { struct file *filp; struct perf_counter *parent; + struct list_head child_list; + /* - * Protect attach/detach: + * Protect attach/detach and child_list: */ struct mutex mutex; @@ -199,13 +208,21 @@ struct perf_counter { struct perf_counter_context { #ifdef CONFIG_PERF_COUNTERS /* - * Protect the list of counters: + * Protect the states of the counters in the list, + * nr_active, and the list: */ spinlock_t lock; + /* + * Protect the list of counters. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; struct list_head counter_list; int nr_counters; int nr_active; + int is_active; struct task_struct *task; #endif }; -- cgit v1.2.3 From 1b023a96d9b44f50f4d8ff28c15f5b80e354760f Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 10:13:01 +0100 Subject: perfcounters: throttle on too high IRQ rates Starting kerneltop with only -c 100 seems to be a bad idea, it can easily lock the system due to perfcounter IRQ overload. So add throttling: if a new IRQ arrives in a shorter than PERFMON_MIN_PERIOD_NS time, turn off perfcounters and untrottle them from the next timer tick. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 33ba9fe0a781..91f1ca4c01c0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -254,6 +254,7 @@ extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); +extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern int perf_counter_task_disable(void); @@ -270,6 +271,8 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } +#define PERFMON_MIN_PERIOD_NS 10000 + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -281,6 +284,7 @@ static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } +static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } -- cgit v1.2.3 From 4b39fd96855254a244f71245b41a91cdecb87d63 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 14:36:16 +0100 Subject: perfcounters: ratelimit performance counter interrupts Ratelimit performance counter interrupts to 100KHz per CPU. This replaces the irq-delta-time based method. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 91f1ca4c01c0..f55381fbcac9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -271,8 +271,6 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } -#define PERFMON_MIN_PERIOD_NS 10000 - #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From 23a185ca8abbeef64b6ffc33059b1d630e43ec10 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 9 Feb 2009 22:42:47 +1100 Subject: perf_counters: make software counters work as per-cpu counters Impact: kernel crash fix Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software counter as a per-cpu counter would reliably crash the system, because it calls __task_delta_exec with a null pointer. The page fault, context switch and cpu migration counters also won't function correctly as per-cpu counters since they reference the current task. This fixes the problem by redirecting the task_clock counter to the cpu_clock counter when used as a per-cpu counter, and by implementing per-cpu page fault, context switch and cpu migration counters. Along the way, this: - Initializes counter->ctx earlier, in perf_counter_alloc, so that sw_perf_counter_init can use it - Adds code to kernel/sched.c to count task migrations into each cpu, in rq->nr_migrations_in - Exports the per-cpu context switch and task migration counts via new functions added to kernel/sched.c - Makes sure that if sw_perf_counter_init fails, we don't try to initialize the counter as a hardware counter. Since the user has passed a negative, non-raw event type, they clearly don't intend for it to be interpreted as a hardware event. Reported-by: "Zhang Yanmin" Signed-off-by: Paul Mackerras Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b85b10abf770..1e5f70062a9c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -137,6 +137,8 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern u64 cpu_nr_switches(int cpu); +extern u64 cpu_nr_migrations(int cpu); struct seq_file; struct cfs_rq; -- cgit v1.2.3 From 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 11 Feb 2009 14:35:35 +1100 Subject: perf_counters: allow users to count user, kernel and/or hypervisor events Impact: new perf_counter feature This extends the perf_counter_hw_event struct with bits that specify that events in user, kernel and/or hypervisor mode should not be counted (i.e. should be excluded), and adds code to program the PMU mode selection bits accordingly on x86 and powerpc. For software counters, we don't currently have the infrastructure to distinguish which mode an event occurs in, so we currently fail the counter initialization if the setting of the hw_event.exclude_* bits would require us to distinguish. Context switches and CPU migrations are currently considered to occur in kernel mode. On x86, this changes the previous policy that only root can count kernel events. Now non-root users can count kernel events or exclude them. Non-root users still can't use NMI events, though. On x86 we don't appear to have any way to control whether hypervisor events are counted or not, so hw_event.exclude_hv is ignored. On powerpc, the selection of whether to count events in user, kernel and/or hypervisor mode is PMU-wide, not per-counter, so this adds a check that the hw_event.exclude_* settings are the same as other events on the PMU. Counters being added to a group have to have the same settings as the other hardware counters in the group. Counters and groups can only be enabled in hw_perf_group_sched_in or power_perf_enable if they have the same settings as any other counters already on the PMU. If we are not running on a hypervisor, the exclude_hv setting is ignored (by forcing it to 0) since we can't ever get any hypervisor events. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f55381fbcac9..c83f51d6e359 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -83,14 +83,17 @@ struct perf_counter_hw_event { u64 irq_period; u32 record_type; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only counter on PMU */ - - __reserved_1 : 26; + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + + __reserved_1 : 23; u64 __reserved_2; }; -- cgit v1.2.3 From c07c99b67233ccaad38a961c17405dc1e1542aa4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 13 Feb 2009 22:10:34 +1100 Subject: perfcounters: make context switch and migration software counters work again Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the context switch and migration software counters to report zero always. With that commit, the software counters only count events that occur between sched-in and sched-out for a task. This is necessary for the counter enable/disable prctls and ioctls to work. However, the context switch and migration counts are incremented after sched-out for one task and before sched-in for the next. Since the increment doesn't occur while a task is scheduled in (as far as the software counters are concerned) it doesn't count towards any counter. Thus the context switch and migration counters need to count events that occur at any time, provided the counter is enabled, not just those that occur while the task is scheduled in (from the perf_counter subsystem's point of view). The problem though is that the software counter code can't tell the difference between being enabled and being scheduled in, and between being disabled and being scheduled out, since we use the one pair of enable/disable entry points for both. That is, the high-level disable operation simply arranges for the counter to not be scheduled in any more, and the high-level enable operation arranges for it to be scheduled in again. One way to solve this would be to have sched_in/out operations in the hw_perf_counter_ops struct as well as enable/disable. However, this takes a simpler approach: it adds a 'prev_state' field to the perf_counter struct that allows a counter's enable method to know whether the counter was previously disabled or just inactive (scheduled out), and therefore whether the enable method is being called as a result of a high-level enable or a schedule-in operation. This then allows the context switch, migration and page fault counters to reset their hw.prev_count value in their enable functions only if they are called as a result of a high-level enable operation. Although page faults would normally only occur while the counter is scheduled in, this changes the page fault counter code too in case there are ever circumstances where page faults get counted against a task while its counters are not scheduled in. Reported-by: Jaswinder Singh Rajput Signed-off-by: Paul Mackerras Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c83f51d6e359..32cd1acb7386 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -173,6 +173,7 @@ struct perf_counter { const struct hw_perf_counter_ops *hw_ops; enum perf_counter_active_state state; + enum perf_counter_active_state prev_state; atomic64_t count; struct perf_counter_hw_event hw_event; -- cgit v1.2.3 From f3dfd2656deb81a0addee4f4ceff66b50a387388 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Feb 2009 22:43:46 +1100 Subject: perfcounters: fix a few minor cleanliness issues This fixes three issues noticed by Arnd Bergmann: - Add #ifdef __KERNEL__ and move some things around in perf_counter.h to make sure only the bits that userspace needs are exported to userspace. - Use __u64, __s64, __u32 types in the structs exported to userspace rather than u64, s64, u32. - Make the sys_perf_counter_open syscall available to the SPUs on Cell platforms. And one issue that I noticed in looking at the code again: - Wrap the perf_counter_open syscall with SYSCALL_DEFINE4 so we get the proper handling of int arguments on ppc64 (and some other 64-bit architectures). Reported-by: Arnd Bergmann Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 43 +++++++++++++++++++++++-------------------- include/linux/syscalls.h | 9 +++------ 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 32cd1acb7386..186efaf49665 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -13,20 +13,8 @@ #ifndef _LINUX_PERF_COUNTER_H #define _LINUX_PERF_COUNTER_H -#include -#include - -#ifdef CONFIG_PERF_COUNTERS -# include -#endif - -#include -#include -#include -#include -#include - -struct task_struct; +#include +#include /* * User-space ABI bits: @@ -78,12 +66,12 @@ enum perf_counter_record_type { * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - s64 type; + __s64 type; - u64 irq_period; - u32 record_type; + __u64 irq_period; + __u32 record_type; - u32 disabled : 1, /* off by default */ + __u32 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -95,7 +83,7 @@ struct perf_counter_hw_event { __reserved_1 : 23; - u64 __reserved_2; + __u64 __reserved_2; }; /* @@ -104,10 +92,24 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +#ifdef __KERNEL__ /* - * Kernel-internal data types: + * Kernel-internal data types and definitions: */ +#ifdef CONFIG_PERF_COUNTERS +# include +#endif + +#include +#include +#include +#include +#include +#include + +struct task_struct; + /** * struct hw_perf_counter - performance counter hardware details: */ @@ -293,4 +295,5 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } #endif +#endif /* __KERNEL__ */ #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88255d3261a4..28ef2be839c7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -696,10 +696,7 @@ asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd); +asmlinkage long sys_perf_counter_open( + const struct perf_counter_hw_event __user *hw_event_uptr, + pid_t pid, int cpu, int group_fd); #endif -- cgit v1.2.3 From 2743a5b0fa6f309da904f2190a9cc25deee34dbd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 4 Mar 2009 20:36:51 +1100 Subject: perfcounters: provide expansion room in the ABI Impact: ABI change This expands several fields in the perf_counter_hw_event struct and adds a "flags" argument to the perf_counter_open system call, in order that features can be added in future without ABI changes. In particular the record_type field is expanded to 64 bits, and the space for flag bits has been expanded from 32 to 64 bits. This also adds some new fields: * read_format (64 bits) is intended to provide a way to specify what userspace wants to get back when it does a read() on a simple (non-interrupting) counter; * exclude_idle (1 bit) provides a way for userspace to ask that events that occur when the cpu is idle be excluded; * extra_config_len will provide a way for userspace to supply an arbitrary amount of extra machine-specific PMU configuration data immediately following the perf_counter_hw_event struct, to allow sophisticated users to program things such as instruction matching CAMs and address range registers; * __reserved_3 and __reserved_4 provide space for future expansion. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 12 +++++++++--- include/linux/syscalls.h | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 186efaf49665..c42455ab1558 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -69,9 +69,10 @@ struct perf_counter_hw_event { __s64 type; __u64 irq_period; - __u32 record_type; + __u64 record_type; + __u64 read_format; - __u32 disabled : 1, /* off by default */ + __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -80,10 +81,15 @@ struct perf_counter_hw_event { exclude_user : 1, /* don't count user */ exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 23; + __reserved_1 : 55; + + __u32 extra_config_len; + __u32 __reserved_4; __u64 __reserved_2; + __u64 __reserved_3; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 28ef2be839c7..ab1d77247395 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -698,5 +698,5 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( const struct perf_counter_hw_event __user *hw_event_uptr, - pid_t pid, int cpu, int group_fd); + pid_t pid, int cpu, int group_fd, unsigned long flags); #endif -- cgit v1.2.3 From 2485e5184452ccbda34ff83883883d9107800dff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 12:33:16 +0100 Subject: perfcounters: fix reserved bits sizing The sum of bits is 65 currently not 64 - so reduce the # of reserved bits from 55 to 54. Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c42455ab1558..dde564517b66 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -83,7 +83,7 @@ struct perf_counter_hw_event { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 55; + __reserved_1 : 54; __u32 extra_config_len; __u32 __reserved_4; -- cgit v1.2.3 From bac429f037f1a51a74d62bad6d1518c3be065df3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Mar 2009 12:50:56 -0400 Subject: tracing: add function profiler Impact: new profiling feature This patch adds a function profiler. In debugfs/tracing/ two new files are created. function_profile_enabled - to enable or disable profiling trace_stat/functions - the profiled functions. For example: echo 1 > /debugfs/tracing/function_profile_enabled ./hackbench 50 echo 0 > /debugfs/tracing/function_profile_enabled yields: cat /debugfs/tracing/trace_stat/functions Function Hit -------- --- _spin_lock 10106442 _spin_unlock 10097492 kfree 6013704 _spin_unlock_irqrestore 4423941 _spin_lock_irqsave 4406825 __phys_addr 4181686 __slab_free 4038222 dput 4030130 path_put 4023387 unroll_tree_refs 4019532 [...] The most hit functions are listed first. Functions that are not hit are not listed. This feature depends on and uses dynamic function tracing. When the function profiling is disabled, no overhead occurs. But it still takes up around 300KB to hold the data, thus it is not recomended to keep it enabled for systems low on memory. When a '1' is echoed into the function_profile_enabled file, the counters for is function is reset back to zero. Thus you can see what functions are hit most by different programs. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf74..0456c3a51c66 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,6 +153,10 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; +#ifdef CONFIG_FUNCTION_PROFILER + unsigned long counter; + struct hlist_node node; +#endif struct dyn_arch_ftrace arch; }; -- cgit v1.2.3 From 493762fc534c71d11d489f872c4b4a2c61173668 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 17:12:36 -0400 Subject: tracing: move function profiler data out of function struct Impact: reduce size of memory in function profiler The function profiler originally introduces its counters into the function records itself. There is 20 thousand different functions on a normal system, and that is adding 20 thousand counters for profiling event when not needed. A normal run of the profiler yields only a couple of thousand functions executed, depending on what is being profiled. This means we have around 18 thousand useless counters. This patch rectifies this by moving the data out of the function records used by dynamic ftrace. Data is preallocated to hold the functions when the profiling begins. Checks are made during profiling to see if more recorcds should be allocated, and they are allocated if it is safe to do so. This also removes the dependency from using dynamic ftrace, and also removes the overhead by having it enabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0456c3a51c66..015a3d22cf74 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,10 +153,6 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long counter; - struct hlist_node node; -#endif struct dyn_arch_ftrace arch; }; -- cgit v1.2.3 From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 23:17:58 -0400 Subject: function-graph: add option to calculate graph time or not graph time is the time that a function is executing another function. Thus if function A calls B, if graph-time is set, then the time for A includes B. This is the default behavior. But if graph-time is off, then the time spent executing B is subtracted from A. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf74..9e0a8d245e55 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -365,6 +365,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -376,8 +377,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function -- cgit v1.2.3 From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:24:03 -0800 Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with arch_start_context_switch Impact: simplification, prepare for later changes Make lazy cpu mode more specific to context switching, so that it makes sense to do more context-switch specific things in the callbacks. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- include/asm-frv/pgtable.h | 4 ++-- include/asm-generic/pgtable.h | 21 +++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index e16fdb1f4f4f..235e34a7a340 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; } #define pgtable_cache_init() do {} while (0) #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) + +#define arch_start_context_switch() do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70aba..922f03671dd8 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. + * A facility to provide batching of the reload of page tables and + * other process state with the actual context switch code for + * paravirtualized guests. By convention, only one of the batched + * update (lazy) modes (CPU, MMU) should be active at any given time, + * entry should never be nested, and entry and exits should always be + * paired. This is for sanity of maintaining and reasoning about the + * kernel code. In this case, the exit (end of the context switch) is + * in architecture-specific code, and so doesn't need a generic + * definition. */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH +#define arch_start_context_switch() do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING -- cgit v1.2.3 From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 11:18:57 -0800 Subject: x86/paravirt: finish change from lazy cpu to context switch start/end Impact: fix lazy context switch API Pass the previous and next tasks into the context switch start end calls, so that the called functions can properly access the task state (esp in end_context_switch, in which the next task is not yet completely current). Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- include/asm-frv/pgtable.h | 2 +- include/asm-generic/pgtable.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 235e34a7a340..09887045d03f 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -74,7 +74,7 @@ static inline int pte_file(pte_t pte) { return 0; } #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 922f03671dd8..e410f602cab1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -291,7 +291,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, * definition. */ #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING -- cgit v1.2.3 From d4c045364d3107603187f21a56ec231e74d26441 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:20:31 -0800 Subject: xen: add irq_from_evtchn Given an evtchn, return the corresponding irq. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- include/xen/events.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1adc0363..e68d59a90ca8 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Determine the IRQ which is bound to an event channel */ +unsigned irq_from_evtchn(unsigned int evtchn); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3 From f7116284c734f3a47180cd9c907944a1837ccb3c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:21:19 -0800 Subject: xen: add /dev/xen/evtchn driver This driver is used by application which wish to receive notifications from the hypervisor or other guests via Xen's event channel mechanism. In particular it is used by the xenstore daemon in domain 0. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- include/xen/evtchn.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 include/xen/evtchn.h (limited to 'include') diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 000000000000..14e833ee4e0b --- /dev/null +++ b/include/xen/evtchn.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_EVTCHN_H__ +#define __LINUX_PUBLIC_EVTCHN_H__ + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) +struct ioctl_evtchn_bind_virq { + unsigned int virq; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IOC(_IOC_NONE, 'E', 5, 0) + +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ -- cgit v1.2.3 From c5cfef0f79cacc3aa438fc28f4747f0d10c54d0d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:21:19 -0800 Subject: xen: export ioctl headers to userspace Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- include/Kbuild | 1 + include/xen/Kbuild | 1 + 2 files changed, 2 insertions(+) create mode 100644 include/xen/Kbuild (limited to 'include') diff --git a/include/Kbuild b/include/Kbuild index d8c3e3cbf416..fe36accd4328 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -8,3 +8,4 @@ header-y += mtd/ header-y += rdma/ header-y += video/ header-y += drm/ +header-y += xen/ diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 000000000000..4e65c16a445b --- /dev/null +++ b/include/xen/Kbuild @@ -0,0 +1 @@ +header-y += evtchn.h -- cgit v1.2.3 From a1ce1be578365a4da7e7d7db4812539d2d5da763 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:50 -0800 Subject: xen: remove suspend_cancel hook Remove suspend_cancel hook from xenbus_driver, in preparation for using the device model for suspending. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- include/xen/xenbus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f9614844d..0836772b9686 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -92,7 +92,6 @@ struct xenbus_driver { enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); - int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; -- cgit v1.2.3 From de5b31bd47de7e6f41be2e271318dbc8f1af354d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:50 -0800 Subject: xen: use device model for suspending xenbus devices Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- include/xen/xenbus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 0836772b9686..b9763badbd77 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -91,7 +91,7 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev, pm_message_t state); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; -- cgit v1.2.3 From cff7e81b3dd7c25cd2248cd7a04c5764552d5d55 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 10 Mar 2009 14:39:59 -0700 Subject: xen: add /sys/hypervisor support Adds support for Xen info under /sys/hypervisor. Taken from Novell 2.6.27 backport tree. Signed-off-by: Jeremy Fitzhardinge --- include/xen/interface/version.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e923f0..e8b6519d47e9 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -57,4 +57,7 @@ struct xen_feature_info { /* Declares the features reported by XENVER_get_features. */ #include "features.h" +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + #endif /* __XEN_PUBLIC_VERSION_H__ */ -- cgit v1.2.3 From 15dbf27cc18559a14e99609f78678aa86b9c6ff1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:32 +0100 Subject: perf_counter: software counter event infrastructure Provide generic software counter infrastructure that supports software events. This will be used to allow sample based profiling based on software events such as pagefaults. The current infrastructure can only provide a count of such events, no place information. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index dde564517b66..3fefc3b8150d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -126,6 +126,7 @@ struct hw_perf_counter { unsigned long counter_base; int nmi; unsigned int idx; + atomic64_t count; /* software */ atomic64_t prev_count; u64 irq_period; atomic64_t period_left; @@ -283,6 +284,8 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } +extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -295,10 +298,13 @@ static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } -static inline void hw_perf_restore(u64 ctrl) { } +static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } + +static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) { } #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:34 +0100 Subject: perf_counter: provide major/minor page fault software events Provide separate sw counters for major and minor page faults. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3fefc3b8150d..4b14a8e9dbf5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -49,8 +49,10 @@ enum hw_event_types { PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_CONTEXT_SWITCHES = -4, PERF_COUNT_CPU_MIGRATIONS = -5, + PERF_COUNT_PAGE_FAULTS_MIN = -6, + PERF_COUNT_PAGE_FAULTS_MAJ = -7, - PERF_SW_EVENTS_MIN = -6, + PERF_SW_EVENTS_MIN = -8, }; /* -- cgit v1.2.3 From d6d020e9957745c61285ef3da9f294c5e6801f0f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:35 +0100 Subject: perf_counter: hrtimer based sampling for software time events Use hrtimers to profile timer based sampling for the software time counters. This allows platforms without hardware counter support to still perform sample based profiling. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4b14a8e9dbf5..dfb4c7ce18b3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -114,6 +114,7 @@ struct perf_counter_hw_event { #include #include #include +#include #include struct task_struct; @@ -123,12 +124,19 @@ struct task_struct; */ struct hw_perf_counter { #ifdef CONFIG_PERF_COUNTERS - u64 config; - unsigned long config_base; - unsigned long counter_base; - int nmi; - unsigned int idx; - atomic64_t count; /* software */ + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; atomic64_t prev_count; u64 irq_period; atomic64_t period_left; -- cgit v1.2.3 From 592903cdcbf606a838056bae6d03fc557806c914 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:36 +0100 Subject: perf_counter: add an event_list I noticed that the counter_list only includes top-level counters, thus perf_swcounter_event() will miss sw-counters in groups. Since perf_swcounter_event() also wants an RCU safe list, create a new event_list that includes all counters and uses RCU list ops and use call_rcu to free the counter structure. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index dfb4c7ce18b3..08c11a6afebc 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -187,6 +187,7 @@ struct file; struct perf_counter { #ifdef CONFIG_PERF_COUNTERS struct list_head list_entry; + struct list_head event_entry; struct list_head sibling_list; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; @@ -220,6 +221,8 @@ struct perf_counter { struct perf_data *irqdata; struct perf_data *usrdata; struct perf_data data[2]; + + struct rcu_head rcu_head; #endif }; @@ -243,6 +246,7 @@ struct perf_counter_context { struct mutex mutex; struct list_head counter_list; + struct list_head event_list; int nr_counters; int nr_active; int is_active; -- cgit v1.2.3 From 01ef09d9ffb5ce9f8d62d1e5206da3d5ca612acc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:11 +0100 Subject: perf_counter: fix uninitialized usage of event_list Impact: fix boot crash When doing the generic context switch event I ran into some early boot hangs, which were caused by inf func recursion (event, fault, event, fault). I eventually tracked it down to event_list not being initialized at the time of the first event. Fix this. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.195392657@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 219748d00262..ca226a91abee 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -124,6 +124,8 @@ extern struct cred init_cred; # define INIT_PERF_COUNTERS(tsk) \ .perf_counter_ctx.counter_list = \ LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.event_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ .perf_counter_ctx.lock = \ __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), #else -- cgit v1.2.3 From 4a0deca657f3dbb8a707b5dc8f173beec01e7ed2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:12 +0100 Subject: perf_counter: generic context switch event Impact: cleanup Use the generic software events for context switches. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.283522645@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75b2fc5306d8..7ed41f7c5ace 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -138,7 +138,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern u64 cpu_nr_switches(int cpu); extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); -- cgit v1.2.3 From e077df4f439681e43f0db8255b2d215b342ebdc6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:17 +0100 Subject: perf_counter: hook up the tracepoint events Impact: new perfcounters feature Enable usage of tracepoints as perf counter events. tracepoint event ids can be found in /debug/tracing/event/*/*/id and (for now) are represented as -65536+id in the type field. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.744044174@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 08c11a6afebc..065984c1ff57 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -53,6 +53,8 @@ enum hw_event_types { PERF_COUNT_PAGE_FAULTS_MAJ = -7, PERF_SW_EVENTS_MIN = -8, + + PERF_TP_EVENTS_MIN = -65536 }; /* @@ -222,6 +224,7 @@ struct perf_counter { struct perf_data *usrdata; struct perf_data data[2]; + void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; #endif }; -- cgit v1.2.3 From b8e83514b64577b48bfb794fe85fcde40a9343ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:18 +0100 Subject: perf_counter: revamp syscall input ABI Impact: modify ABI The hardware/software classification in hw_event->type became a little strained due to the addition of tracepoint tracing. Instead split up the field and provide a type field to explicitly specify the counter type, while using the event_id field to specify which event to use. Raw counters still work as before, only the raw config now goes into raw_event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.836807573@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 95 ++++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 065984c1ff57..8f9394905502 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -21,56 +21,81 @@ */ /* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: + * hw_event.type */ -enum hw_event_types { +enum perf_event_types { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + /* - * Common hardware events, generalized by the kernel: + * available TYPE space, raw is the max value. */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 7, + PERF_TYPE_RAW = 128, +}; +/* + * Generalized performance counter event types, used by the hw_event.event_id + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_ids { /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): + * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, - PERF_COUNT_CPU_MIGRATIONS = -5, - PERF_COUNT_PAGE_FAULTS_MIN = -6, - PERF_COUNT_PAGE_FAULTS_MAJ = -7, - - PERF_SW_EVENTS_MIN = -8, + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, +}; - PERF_TP_EVENTS_MIN = -65536 +/* + * Special "software" counters provided by the kernel, even if the hardware + * does not support performance counters. These counters measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum sw_event_ids { + PERF_COUNT_CPU_CLOCK = 0, + PERF_COUNT_TASK_CLOCK = 1, + PERF_COUNT_PAGE_FAULTS = 2, + PERF_COUNT_CONTEXT_SWITCHES = 3, + PERF_COUNT_CPU_MIGRATIONS = 4, + PERF_COUNT_PAGE_FAULTS_MIN = 5, + PERF_COUNT_PAGE_FAULTS_MAJ = 6, + + PERF_SW_EVENTS_MAX = 7, }; /* * IRQ-notification data record type: */ enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, }; /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - __s64 type; + union { + struct { + __u64 event_id : 56, + type : 8; + }; + struct { + __u64 raw_event_id : 63, + raw_type : 1; + }; + __u64 event_config; + }; __u64 irq_period; __u64 record_type; @@ -78,7 +103,6 @@ struct perf_counter_hw_event { __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ pinned : 1, /* must always be on PMU */ exclusive : 1, /* only group on PMU */ @@ -87,7 +111,7 @@ struct perf_counter_hw_event { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 54; + __reserved_1 : 55; __u32 extra_config_len; __u32 __reserved_4; @@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, */ static inline int is_software_counter(struct perf_counter *counter) { - return !counter->hw_event.raw && counter->hw_event.type < 0; + return !counter->hw_event.raw_type && + counter->hw_event.type != PERF_TYPE_HARDWARE; } -extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); #else static inline void @@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, +static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } #endif -- cgit v1.2.3 From 0322cd6ec504b0bf08ca7b2c3d7f43bda37d79c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:19 +0100 Subject: perf_counter: unify irq output code Impact: cleanup Having 3 slightly different copies of the same code around does nobody any good. First step in revamping the output format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.929962222@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8f9394905502..a4b76c0175f3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -317,6 +317,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +extern void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs); /* * Return 1 for a software counter, 0 for a hardware counter */ -- cgit v1.2.3 From 9aaa131a279834dff75c290c91f0058f62d72d46 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 21 Mar 2009 15:31:47 +1100 Subject: perf_counter: fix type/event_id layout on big-endian systems Impact: build fix for powerpc Commit db3a944aca35ae61 ("perf_counter: revamp syscall input ABI") expanded the hw_event.type field into a union of structs containing bitfields. In particular it introduced a type field and a raw_type field, with the intention that the 1-bit raw_type field should overlay the most-significant bit of the 8-bit type field, and in fact perf_counter_alloc() now assumes that (or at least, assumes that raw_type doesn't overlay any of the bits that are 1 in the values of PERF_TYPE_{HARDWARE,SOFTWARE,TRACEPOINT}). Unfortunately this is not true on big-endian systems such as PowerPC, where bitfields are laid out from left to right, i.e. from most significant bit to least significant. This means that setting hw_event.type = PERF_TYPE_SOFTWARE will set hw_event.raw_type to 1. This fixes it by making the layout depend on whether or not __BIG_ENDIAN_BITFIELD is defined. It's a bit ugly, but that's what we get for using bitfields in a user/kernel ABI. Also, that commit didn't fix up some places in arch/powerpc/kernel/ perf_counter.c where hw_event.raw and hw_event.event_id were used. This fixes them too. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a4b76c0175f3..98f5990be1e1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -15,6 +15,7 @@ #include #include +#include /* * User-space ABI bits: @@ -86,6 +87,7 @@ enum perf_counter_record_type { */ struct perf_counter_hw_event { union { +#ifndef __BIG_ENDIAN_BITFIELD struct { __u64 event_id : 56, type : 8; @@ -94,6 +96,16 @@ struct perf_counter_hw_event { __u64 raw_event_id : 63, raw_type : 1; }; +#else + struct { + __u64 type : 8, + event_id : 56; + }; + struct { + __u64 raw_type : 1, + raw_event_id : 63; + }; +#endif /* __BIT_ENDIAN_BITFIELD */ __u64 event_config; }; -- cgit v1.2.3 From f4a2deb4860497f4332cf6a1acddab3dd628ddf0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:06 +0100 Subject: perf_counter: remove the event config bitfields Since the bitfields turned into a bit of a mess, remove them and rely on good old masks. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.059499915@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 74 ++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 98f5990be1e1..56099e52970d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -82,32 +82,37 @@ enum perf_counter_record_type { PERF_RECORD_GROUP = 2, }; +#define __PERF_COUNTER_MASK(name) \ + (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ + PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW_BITS 1 +#define PERF_COUNTER_RAW_SHIFT 63 +#define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) + +#define PERF_COUNTER_CONFIG_BITS 63 +#define PERF_COUNTER_CONFIG_SHIFT 0 +#define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) + +#define PERF_COUNTER_TYPE_BITS 7 +#define PERF_COUNTER_TYPE_SHIFT 56 +#define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) + +#define PERF_COUNTER_EVENT_BITS 56 +#define PERF_COUNTER_EVENT_SHIFT 0 +#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) + /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - union { -#ifndef __BIG_ENDIAN_BITFIELD - struct { - __u64 event_id : 56, - type : 8; - }; - struct { - __u64 raw_event_id : 63, - raw_type : 1; - }; -#else - struct { - __u64 type : 8, - event_id : 56; - }; - struct { - __u64 raw_type : 1, - raw_event_id : 63; - }; -#endif /* __BIT_ENDIAN_BITFIELD */ - __u64 event_config; - }; + /* + * The MSB of the config word signifies if the rest contains cpu + * specific (raw) counter configuration data, if unset, the next + * 7 bits are an event type and the rest of the bits are the event + * identifier. + */ + __u64 config; __u64 irq_period; __u64 record_type; @@ -157,6 +162,27 @@ struct perf_counter_hw_event { struct task_struct; +static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_RAW_MASK; +} + +static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_CONFIG_MASK; +} + +static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) +{ + return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> + PERF_COUNTER_TYPE_SHIFT; +} + +static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_EVENT_MASK; +} + /** * struct hw_perf_counter - performance counter hardware details: */ @@ -336,8 +362,8 @@ extern void perf_counter_output(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !counter->hw_event.raw_type && - counter->hw_event.type != PERF_TYPE_HARDWARE; + return !perf_event_raw(&counter->hw_event) && + perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); -- cgit v1.2.3 From 96f6d4444302bb2ea2cf409529eef816462f6ce0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:07 +0100 Subject: perf_counter: avoid recursion Tracepoint events like lock_acquire and software counters like pagefaults can recurse into the perf counter code again, avoid that. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.152096433@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 56099e52970d..18dc17d0a61c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -328,6 +328,13 @@ struct perf_cpu_context { int active_oncpu; int max_pertask; int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; }; /* -- cgit v1.2.3 From 37d81828385f8ff823caaaf1a83e72d065b6cfa1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Mar 2009 18:22:08 +0100 Subject: perf_counter: add an mmap method to allow userspace to read hardware counters Impact: new feature giving performance improvement This adds the ability for userspace to do an mmap on a hardware counter fd and get access to a read-only page that contains the information needed to translate a hardware counter value to the full 64-bit counter value that would be returned by a read on the fd. This is useful on architectures that allow user programs to read the hardware counters, such as PowerPC. The mmap will only succeed if the counter is a hardware counter monitoring the current process. On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter and translate it to the full 64-bit value in about 30ns using the mmapped page, compared to about 830ns for the read syscall on the counter, so this does give a significant performance improvement. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090323172417.297057964@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 18dc17d0a61c..40b324e91bf6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -143,6 +143,17 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_counter_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware counter identifier */ + __s64 offset; /* add to hardware counter value */ +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -278,6 +289,9 @@ struct perf_counter { int oncpu; int cpu; + /* pointer to page shared with userspace via mmap */ + unsigned long user_page; + /* read() / irq related data */ wait_queue_head_t waitq; /* optional: for NMIs */ @@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void); extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +extern void perf_counter_update_userpage(struct perf_counter *counter); extern void perf_counter_output(struct perf_counter *counter, int nmi, struct pt_regs *regs); -- cgit v1.2.3 From 9ab772cd535c4b256a577eae516f9c7462346b2d Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Mar 2009 18:22:09 +0100 Subject: mutex: add atomic_dec_and_mutex_lock() Much like the atomic_dec_and_lock() function in which we take an hold a spin_lock if we drop the atomic to 0 this function takes and holds the mutex if we dec the atomic to 0. Signed-off-by: Eric Paris Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.410913479@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab8..93054fc3635c 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} + #endif -- cgit v1.2.3 From 7b732a75047738e4f85438ed2f9cd34bf5f2a19a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:10 +0100 Subject: perf_counter: new output ABI - part 1 Impact: Rework the perfcounter output ABI use sys_read() only for instant data and provide mmap() output for all async overflow data. The first mmap() determines the size of the output buffer. The mmap() size must be a PAGE_SIZE multiple of 1+pages, where pages must be a power of 2 or 0. Further mmap()s of the same fd must have the same size. Once all maps are gone, you can again mmap() with a new size. In case of 0 extra pages there is no data output and the first page only contains meta data. When there are data pages, a poll() event will be generated for each full page of data. Furthermore, the output is circular. This means that although 1 page is a valid configuration, its useless, since we'll start overwriting it the instant we report a full page. Future work will focus on the output format (currently maintained) where we'll likey want each entry denoted by a header which includes a type and length. Further future work will allow to splice() the fd, also containing the async overflow data -- splice() would be mutually exclusive with mmap() of the data. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.470536358@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 40b324e91bf6..2b5e66d5ebdf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -152,6 +152,8 @@ struct perf_counter_mmap_page { __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + + __u32 data_head; /* head in the data section */ }; #ifdef __KERNEL__ @@ -218,21 +220,6 @@ struct hw_perf_counter { #endif }; -/* - * Hardcoded buffer length limit for now, for IRQ-fed events: - */ -#define PERF_DATA_BUFLEN 2048 - -/** - * struct perf_data - performance counter IRQ data sampling ... - */ -struct perf_data { - int len; - int rd_idx; - int overrun; - u8 data[PERF_DATA_BUFLEN]; -}; - struct perf_counter; /** @@ -256,6 +243,14 @@ enum perf_counter_active_state { struct file; +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; + atomic_t head; + struct perf_counter_mmap_page *user_page; + void *data_pages[0]; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -289,16 +284,15 @@ struct perf_counter { int oncpu; int cpu; - /* pointer to page shared with userspace via mmap */ - unsigned long user_page; + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; - /* read() / irq related data */ + /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ int wakeup_pending; - struct perf_data *irqdata; - struct perf_data *usrdata; - struct perf_data data[2]; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; -- cgit v1.2.3 From c7138f37f905bb7987b1f9f5a8ee73667db39f25 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 Mar 2009 13:18:16 +0100 Subject: perf_counter: fix perf_poll() Impact: fix kerneltop 100% CPU usage Only return a poll event when there's actually been one, poll_wait() doesn't actually wait for the waitq you pass it, it only enqueues you on it. Only once all FDs have been iterated and none of thm returned a poll-event will it schedule(). Also make it return POLL_HUP when there's not mmap() area to read from. Further, fix a silly bug in the write code. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arjan van de Ven Orig-LKML-Reference: <1237897096.24918.181.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2b5e66d5ebdf..48212c15b7d6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -246,6 +246,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; + atomic_t wakeup; atomic_t head; struct perf_counter_mmap_page *user_page; void *data_pages[0]; -- cgit v1.2.3 From 5c1481943250ab65fa5130e05ec479c93216e9f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:23 +0100 Subject: perf_counter: output objects Provide a {type,size} header for each output entry. This should provide extensible output, and the ability to mix multiple streams. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113316.831607932@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 48212c15b7d6..c256635377d4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -156,6 +156,16 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +struct perf_event_header { + __u32 type; + __u32 size; +}; + +enum perf_event_type { + PERF_EVENT_IP = 0, + PERF_EVENT_GROUP = 1, +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -260,6 +270,7 @@ struct perf_counter { struct list_head list_entry; struct list_head event_entry; struct list_head sibling_list; + int nr_siblings; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; -- cgit v1.2.3 From ea5d20cf99db5d26d43b6d322d3ace17e08a6552 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:25 +0100 Subject: perf_counter: optionally provide the pid/tid of the sampled task Allow cpu wide counters to profile userspace by providing what process the sample belongs to. This raises the first issue with the output type, lots of these options: group, tid, callchain, etc.. are non-exclusive and could be combined, suggesting a bitfield. However, things like the mmap() data stream doesn't fit in that. How to split the type field... Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113317.013775235@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c256635377d4..7fdbdf8be775 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -127,8 +127,9 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ + include_tid : 1, /* include the tid */ - __reserved_1 : 55; + __reserved_1 : 54; __u32 extra_config_len; __u32 __reserved_4; @@ -164,6 +165,8 @@ struct perf_event_header { enum perf_event_type { PERF_EVENT_IP = 0, PERF_EVENT_GROUP = 1, + + __PERF_EVENT_TID = 0x100, }; #ifdef __KERNEL__ -- cgit v1.2.3 From 53cfbf593758916aac41db728f029986a62f1254 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 25 Mar 2009 22:46:58 +1100 Subject: perf_counter: record time running and time enabled for each counter Impact: new functionality Currently, if there are more counters enabled than can fit on the CPU, the kernel will multiplex the counters on to the hardware using round-robin scheduling. That isn't too bad for sampling counters, but for counting counters it means that the value read from a counter represents some unknown fraction of the true count of events that occurred while the counter was enabled. This remedies the situation by keeping track of how long each counter is enabled for, and how long it is actually on the cpu and counting events. These times are recorded in nanoseconds using the task clock for per-task counters and the cpu clock for per-cpu counters. These values can be supplied to userspace on a read from the counter. Userspace requests that they be supplied after the counter value by setting the PERF_FORMAT_TOTAL_TIME_ENABLED and/or PERF_FORMAT_TOTAL_TIME_RUNNING bits in the hw_event.read_format field when creating the counter. (There is no way to change the read format after the counter is created, though it would be possible to add some way to do that.) Using this information it is possible for userspace to scale the count it reads from the counter to get an estimate of the true count: true_count_estimate = count * total_time_enabled / total_time_running This also lets userspace detect the situation where the counter never got to go on the cpu: total_time_running == 0. This functionality has been requested by the PAPI developers, and will be generally needed for interpreting the count values from counting counters correctly. In the implementation, this keeps 5 time values (in nanoseconds) for each counter: total_time_enabled and total_time_running are used when the counter is in state OFF or ERROR and for reporting back to userspace. When the counter is in state INACTIVE or ACTIVE, it is the tstamp_enabled, tstamp_running and tstamp_stopped values that are relevant, and total_time_enabled and total_time_running are determined from them. (tstamp_stopped is only used in INACTIVE state.) The reason for doing it like this is that it means that only counters being enabled or disabled at sched-in and sched-out time need to be updated. There are no new loops that iterate over all counters to update total_time_enabled or total_time_running. This also keeps separate child_total_time_running and child_total_time_enabled fields that get added in when reporting the totals to userspace. They are separate fields so that they can be atomic. We don't want to use atomics for total_time_running, total_time_enabled etc., because then we would have to use atomic sequences to update them, which are slower than regular arithmetic and memory accesses. It is possible to measure total_time_running by adding a task_clock counter to each group of counters, and total_time_enabled can be measured approximately with a top-level task_clock counter (though inaccuracies will creep in if you need to disable and enable groups since it is not possible in general to disable/enable the top-level task_clock counter simultaneously with another group). However, that adds extra overhead - I measured around 15% increase in the context switch latency reported by lat_ctx (from lmbench) when a task_clock counter was added to each of 2 groups, and around 25% increase when a task_clock counter was added to each of 4 groups. (In both cases a top-level task-clock counter was also added.) In contrast, the code added in this commit gives better information with no overhead that I could measure (in fact in some cases I measured lower times with this code, but the differences were all less than one standard deviation). [ v2: address review comments by Andrew Morton. ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Andrew Morton Orig-LKML-Reference: <18890.6578.728637.139402@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7fdbdf8be775..6bf67ce17625 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -102,6 +102,16 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_SHIFT 0 #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) +/* + * Bits that can be set in hw_event.read_format to request that + * reads on the counter should return the indicated quantities, + * in increasing order of bit value, after the counter value. + */ +enum perf_counter_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1, + PERF_FORMAT_TOTAL_TIME_RUNNING = 2, +}; + /* * Hardware event to monitor via a performance monitoring counter: */ @@ -281,6 +291,32 @@ struct perf_counter { enum perf_counter_active_state prev_state; atomic64_t count; + /* + * These are the total time in nanoseconds that the counter + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task counter) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the counter is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the counter was enabled + * tstamp_running: the notional time when the counter was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * counter was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; @@ -291,6 +327,13 @@ struct perf_counter { struct perf_counter *parent; struct list_head child_list; + /* + * These accumulate total time (in nanoseconds) that children + * counters have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + /* * Protect attach/detach and child_list: */ @@ -339,6 +382,16 @@ struct perf_counter_context { int nr_active; int is_active; struct task_struct *task; + + /* + * time_now is the current time in nanoseconds since an arbitrary + * point in the past. For per-task counters, this is based on the + * task clock, and for per-cpu counters it is based on the cpu clock. + * time_lost is an offset from the task/cpu clock, used to make it + * appear that time only passes while the context is scheduled in. + */ + u64 time_now; + u64 time_lost; #endif }; -- cgit v1.2.3 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bf67ce17625..0d833228eee5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -275,6 +275,10 @@ struct perf_mmap_data { void *data_pages[0]; }; +struct perf_wakeup_entry { + struct perf_wakeup_entry *next; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -350,7 +354,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ - int wakeup_pending; + struct perf_wakeup_entry wakeup; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; @@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); @@ -461,7 +465,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } @@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(u32 event, u64 nr, - int nmi, struct pt_regs *regs) { } +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From 38ff667b321b00f5e6830e93fb4ab11a653a2920 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:03 +0200 Subject: perf_counter: fix update_userpage() It just occured to me it is possible to have multiple contending updates of the userpage (mmap information vs overflow vs counter). This would break the seqlock logic. It appear the arch code uses this from NMI context, so we cannot possibly serialize its use, therefore separate the data_head update from it and let it return to its original use. The arch code needs to make sure there are no contending callers by disabling the counter before using it -- powerpc appears to do this nicely. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.241410660@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0d833228eee5..8ac18852dcfe 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -160,10 +160,45 @@ struct perf_counter_hw_event { struct perf_counter_mmap_page { __u32 version; /* version number of this structure */ __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw counters in user-space. + * + * The index and offset should be read atomically using the seqlock: + * + * __u32 seq, index; + * __s64 offset; + * + * again: + * rmb(); + * seq = pc->lock; + * + * if (unlikely(seq & 1)) { + * cpu_relax(); + * goto again; + * } + * + * index = pc->index; + * offset = pc->offset; + * + * rmb(); + * if (pc->lock != seq) + * goto again; + * + * After this, index contains architecture specific counter index + 1, + * so that 0 means unavailable, offset contains the value to be added + * to the result of the raw timer read to obtain this counter's value. + */ __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + /* + * Control data for the mmap() data buffer. + * + * User-space reading this value should issue an rmb(), on SMP capable + * platforms, after reading this value -- see perf_counter_wakeup(). + */ __u32 data_head; /* head in the data section */ }; -- cgit v1.2.3 From 0a4a93919bdc5cee48fe4367591e8e0449c1086c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:05 +0200 Subject: perf_counter: executable mmap() information Currently the profiling information returns userspace IPs but no way to correlate them to userspace code. Userspace could look into /proc/$pid/maps but that might not be current or even present anymore at the time of analyzing the IPs. Therefore provide means to track the mmap information and provide it in the output stream. XXX: only covers mmap()/munmap(), mremap() and mprotect() are missing. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Cc: Andrew Morton Orig-LKML-Reference: <20090330171023.417259499@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8ac18852dcfe..037a81145aca 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -137,9 +137,11 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ + include_tid : 1, /* include the tid */ + mmap : 1, /* include mmap data */ + munmap : 1, /* include munmap data */ - __reserved_1 : 54; + __reserved_1 : 52; __u32 extra_config_len; __u32 __reserved_4; @@ -211,6 +213,9 @@ enum perf_event_type { PERF_EVENT_IP = 0, PERF_EVENT_GROUP = 1, + PERF_EVENT_MMAP = 2, + PERF_EVENT_MUNMAP = 3, + __PERF_EVENT_TID = 0x100, }; @@ -491,6 +496,12 @@ static inline int is_software_counter(struct perf_counter *counter) extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); +extern void perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file); + +extern void perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -511,6 +522,15 @@ static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + +static inline void +perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) { } + +static inline void +perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) { } + #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From 5ed00415e304203a0a9dcaef226d6d3f1106070e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:12 +0200 Subject: perf_counter: re-arrange the perf_event_type Breaks ABI yet again :-) Change the event type so that [0, 2^31-1] are regular event types, but [2^31, 2^32-1] forms a bitmask for overflow events. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.047961770@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 037a81145aca..edf5bfb7ff51 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -210,13 +210,15 @@ struct perf_event_header { }; enum perf_event_type { - PERF_EVENT_IP = 0, + PERF_EVENT_GROUP = 1, PERF_EVENT_MMAP = 2, PERF_EVENT_MUNMAP = 3, - __PERF_EVENT_TID = 0x100, + PERF_EVENT_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = 1UL << 30, + __PERF_EVENT_TID = 1UL << 29, }; #ifdef __KERNEL__ -- cgit v1.2.3 From 394ee07623cf556c8daae2b3c00cf5fea47f0811 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:14 +0200 Subject: perf_counter: provide generic callchain bits Provide the generic callchain support bits. If hw_event->callchain is set the arch specific perf_callchain() function is called upon to provide a perf_callchain_entry structure filled with the current callchain. If it does so, it is added to the overflow output event. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.254266860@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index edf5bfb7ff51..43083afffe0f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -140,8 +140,9 @@ struct perf_counter_hw_event { include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ + callchain : 1, /* add callchain data */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 extra_config_len; __u32 __reserved_4; @@ -219,6 +220,7 @@ enum perf_event_type { PERF_EVENT_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = 1UL << 30, __PERF_EVENT_TID = 1UL << 29, + __PERF_EVENT_CALLCHAIN = 1UL << 28, }; #ifdef __KERNEL__ @@ -504,6 +506,15 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); +#define MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + u64 nr; + u64 ip[MAX_STACK_DEPTH]; +}; + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From 8a057d84912f36e53f970c4d177cb4bb6b2f9e08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:11:59 +0200 Subject: perf_counter: move the event overflow output bits to record_type Per suggestion from Paul, move the event overflow bits to record_type and sanitize the enums a bit. Breaks the ABI -- again ;-) Suggested-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.151921176@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 50 +++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 43083afffe0f..06a6fba9f531 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,15 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - #define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -102,6 +93,17 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_SHIFT 0 #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) +/* + * Bits that can be set in hw_event.record_type to request information + * in the overflow packets. + */ +enum perf_counter_record_format { + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_GROUP = 1U << 2, + PERF_RECORD_CALLCHAIN = 1U << 3, +}; + /* * Bits that can be set in hw_event.read_format to request that * reads on the counter should return the indicated quantities, @@ -125,8 +127,8 @@ struct perf_counter_hw_event { __u64 config; __u64 irq_period; - __u64 record_type; - __u64 read_format; + __u32 record_type; + __u32 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -137,12 +139,10 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ - callchain : 1, /* add callchain data */ - __reserved_1 : 51; + __reserved_1 : 53; __u32 extra_config_len; __u32 __reserved_4; @@ -212,15 +212,21 @@ struct perf_event_header { enum perf_event_type { - PERF_EVENT_GROUP = 1, - - PERF_EVENT_MMAP = 2, - PERF_EVENT_MUNMAP = 3, + PERF_EVENT_MMAP = 1, + PERF_EVENT_MUNMAP = 2, - PERF_EVENT_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = 1UL << 30, - __PERF_EVENT_TID = 1UL << 29, - __PERF_EVENT_CALLCHAIN = 1UL << 28, + /* + * Half the event type space is reserved for the counter overflow + * bitfields, as found in hw_event.record_type. + * + * These events will have types of the form: + * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + */ + PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = PERF_RECORD_IP, + __PERF_EVENT_TID = PERF_RECORD_TID, + __PERF_EVENT_GROUP = PERF_RECORD_GROUP, + __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, }; #ifdef __KERNEL__ -- cgit v1.2.3 From c457810ab4a825161aec6ef71b581e1bc8febd1a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:01 +0200 Subject: perf_counter: per event wakeups By request, provide a way to request a wakeup every 'n' events instead of every page of output. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.323309784@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 06a6fba9f531..5428ba120d78 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -145,7 +145,7 @@ struct perf_counter_hw_event { __reserved_1 : 53; __u32 extra_config_len; - __u32 __reserved_4; + __u32 wakeup_events; /* wakeup every n events */ __u64 __reserved_2; __u64 __reserved_3; @@ -321,6 +321,7 @@ struct perf_mmap_data { int nr_pages; atomic_t wakeup; atomic_t head; + atomic_t events; struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; -- cgit v1.2.3 From 5872bdb88a35fae7d224bd6b21e5f377e854ccfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:03 +0200 Subject: perf_counter: add more context information Put in counts to tell which ips belong to what context. ----- | | hv | -- nr | | kernel | -- | | user ----- Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.493101305@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5428ba120d78..90cce0c74a03 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -513,10 +513,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -#define MAX_STACK_DEPTH 255 +#define MAX_STACK_DEPTH 254 struct perf_callchain_entry { - u64 nr; + u32 nr, hv, kernel, user; u64 ip[MAX_STACK_DEPTH]; }; -- cgit v1.2.3 From 92f22a3865abe87eea2609a6f8e5be5123f7ce4f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:04 +0200 Subject: perf_counter: update mmap() counter read Paul noted that we don't need SMP barriers for the mmap() counter read because its always on the same cpu (otherwise you can't access the hw counter anyway). So remove the SMP barriers and replace them with regular compiler barriers. Further, update the comment to include a race free method of reading said hardware counter. The primary change is putting the pmc_read inside the seq-loop, otherwise we can still race and read rubbish. Noticed-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.577951445@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 90cce0c74a03..f2b914de3f0c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -167,30 +167,28 @@ struct perf_counter_mmap_page { /* * Bits needed to read the hw counters in user-space. * - * The index and offset should be read atomically using the seqlock: - * - * __u32 seq, index; - * __s64 offset; + * u32 seq; + * s64 count; * * again: - * rmb(); * seq = pc->lock; - * * if (unlikely(seq & 1)) { * cpu_relax(); * goto again; * } * - * index = pc->index; - * offset = pc->offset; + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; * - * rmb(); + * barrier(); * if (pc->lock != seq) * goto again; * - * After this, index contains architecture specific counter index + 1, - * so that 0 means unavailable, offset contains the value to be added - * to the result of the raw timer read to obtain this counter's value. + * NOTE: for obvious reason this only works on self-monitoring + * processes. */ __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ -- cgit v1.2.3 From 52400ba946759af28442dee6265c5c0180ac7122 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:49 -0700 Subject: futex: add requeue_pi functionality PI Futexes and their underlying rt_mutex cannot be left ownerless if there are pending waiters as this will break the PI boosting logic, so the standard requeue commands aren't sufficient. The new commands properly manage pi futex ownership by ensuring a futex with waiters has an owner at all times. This will allow glibc to properly handle pi mutexes with pthread_condvars. The approach taken here is to create two new futex op codes: FUTEX_WAIT_REQUEUE_PI: Tasks will use this op code to wait on a futex (such as a non-pi waitqueue) and wake after they have been requeued to a pi futex. Prior to returning to userspace, they will acquire this pi futex (and the underlying rt_mutex). futex_wait_requeue_pi() is the result of a high speed collision between futex_wait() and futex_lock_pi() (with the first part of futex_lock_pi() being done by futex_proxy_trylock_atomic() on behalf of the top_waiter). FUTEX_REQUEUE_PI (and FUTEX_CMP_REQUEUE_PI): This call must be used to wake tasks waiting with FUTEX_WAIT_REQUEUE_PI, regardless of how many tasks the caller intends to wake or requeue. pthread_cond_broadcast() should call this with nr_wake=1 and nr_requeue=INT_MAX. pthread_cond_signal() should call this with nr_wake=1 and nr_requeue=0. The reason being we need both callers to get the benefit of the futex_proxy_trylock_atomic() routine. futex_requeue() also enqueues the top_waiter on the rt_mutex via rt_mutex_start_proxy_lock(). Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 8 ++++++++ include/linux/thread_info.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5a34f9..b05519ca9e57 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,9 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_REQUEUE_PI 12 +#define FUTEX_CMP_REQUEUE_PI 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +41,11 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f8b56b..a8cc4e13434c 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -21,13 +21,14 @@ struct restart_block { struct { unsigned long arg0, arg1, arg2, arg3; }; - /* For futex_wait */ + /* For futex_wait and futex_wait_requeue_pi */ struct { u32 *uaddr; u32 val; u32 flags; u32 bitset; u64 time; + u32 *uaddr2; } futex; /* For nanosleep */ struct { -- cgit v1.2.3 From a2e87d06ddbe6e6fdb8d6d2e5e985efe4efb07dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:44:59 +0200 Subject: perf_counter: update mmap() counter read, take 2 Update the userspace read method. Paul noted that: - userspace cannot observe ->lock & 1 on the same cpu. - we need a barrier() between reading ->lock and ->index to ensure we read them in that prticular order. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.368446033@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f2b914de3f0c..e22ab47a2f41 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -170,22 +170,18 @@ struct perf_counter_mmap_page { * u32 seq; * s64 count; * - * again: - * seq = pc->lock; - * if (unlikely(seq & 1)) { - * cpu_relax(); - * goto again; - * } + * do { + * seq = pc->lock; * - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; * - * barrier(); - * if (pc->lock != seq) - * goto again; + * barrier(); + * } while (pc->lock != seq); * * NOTE: for obvious reason this only works on self-monitoring * processes. -- cgit v1.2.3 From 9c03d88e328d5f28f13191622c2ea1349c36b799 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:00 +0200 Subject: perf_counter: add more context information Change the callchain context entries to u16, so as to gain some space. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.457320003@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e22ab47a2f41..f9d5cf0bfbdd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -507,10 +507,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -#define MAX_STACK_DEPTH 254 +#define MAX_STACK_DEPTH 255 struct perf_callchain_entry { - u32 nr, hv, kernel, user; + u16 nr, hv, kernel, user; u64 ip[MAX_STACK_DEPTH]; }; -- cgit v1.2.3 From 3c446b3d3b38f991f97e9d2df0ad26a60a94dcff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:01 +0200 Subject: perf_counter: SIGIO support Provide support for fcntl() I/O availability signals. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.579788800@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f9d5cf0bfbdd..8d5d11b8d011 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -238,6 +238,7 @@ enum perf_event_type { #include #include #include +#include #include struct task_struct; @@ -398,6 +399,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; + struct fasync_struct *fasync; /* optional: for NMIs */ struct perf_wakeup_entry wakeup; -- cgit v1.2.3 From 671dec5daf3b3c43c5777be282f00120a44cf37f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:02 +0200 Subject: perf_counter: generalize pending infrastructure Prepare the pending infrastructure to do more than wakeups. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.634732847@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8d5d11b8d011..977fb15a53f3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -321,8 +321,9 @@ struct perf_mmap_data { void *data_pages[0]; }; -struct perf_wakeup_entry { - struct perf_wakeup_entry *next; +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); }; /** @@ -401,7 +402,7 @@ struct perf_counter { wait_queue_head_t waitq; struct fasync_struct *fasync; /* optional: for NMIs */ - struct perf_wakeup_entry wakeup; + struct perf_pending_entry pending; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; -- cgit v1.2.3 From f6c7d5fe58b4846ee0cb4b98b6042489705eced4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:04 +0200 Subject: perf_counter: theres more to overflow than writing events Prepare for more generic overflow handling. The new perf_counter_overflow() method will handle the generic bits of the counter overflow, and can return a !0 return value, in which case the counter should be (soft) disabled, so that it won't count until it's properly disabled. XXX: do powerpc and swcounter Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.812109629@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 977fb15a53f3..ca2d4df29e0c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -491,8 +491,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_counter_context *ctx, int cpu); extern void perf_counter_update_userpage(struct perf_counter *counter); -extern void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs); +extern int perf_counter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs); /* * Return 1 for a software counter, 0 for a hardware counter */ -- cgit v1.2.3 From 339f7c90b8a2f3aa2dd4267e79f797999e8a3c59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:06 +0200 Subject: perf_counter: PERF_RECORD_TIME By popular request, provide means to log a timestamp along with the counter overflow event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.024173282@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ca2d4df29e0c..928a7fae0961 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -102,6 +102,7 @@ enum perf_counter_record_format { PERF_RECORD_TID = 1U << 1, PERF_RECORD_GROUP = 1U << 2, PERF_RECORD_CALLCHAIN = 1U << 3, + PERF_RECORD_TIME = 1U << 4, }; /* @@ -221,6 +222,7 @@ enum perf_event_type { __PERF_EVENT_TID = PERF_RECORD_TID, __PERF_EVENT_GROUP = PERF_RECORD_GROUP, __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, + __PERF_EVENT_TIME = PERF_RECORD_TIME, }; #ifdef __KERNEL__ -- cgit v1.2.3 From 79f146415623fe74f39af67c0f6adc208939a410 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:07 +0200 Subject: perf_counter: counter overflow limit Provide means to auto-disable the counter after 'n' overflow events. Create the counter with hw_event.disabled = 1, and then issue an ioctl(fd, PREF_COUNTER_IOC_REFRESH, n); to set the limit and enable the counter. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.083139737@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 928a7fae0961..ef4dcbff75ab 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -155,8 +155,9 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) /* * Structure of the page that can be mapped via mmap @@ -403,9 +404,14 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; struct fasync_struct *fasync; - /* optional: for NMIs */ + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_disable; struct perf_pending_entry pending; + atomic_t event_limit; + void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; #endif -- cgit v1.2.3 From 0c593b3411341e3a05a61f5527df36ab02bd11e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:08 +0200 Subject: perf_counter: comment the perf_event_type stuff Describe the event format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.211174347@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ef4dcbff75ab..81220188d058 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -208,6 +208,20 @@ struct perf_event_header { enum perf_event_type { + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ PERF_EVENT_MMAP = 1, PERF_EVENT_MUNMAP = 2, @@ -217,6 +231,24 @@ enum perf_event_type { * * These events will have types of the form: * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + * + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && __PERF_EVENT_IP + * { u32 pid, tid; } && __PERF_EVENT_TID + * + * { u64 nr; + * { u64 event, val; } cnt[nr]; } && __PERF_EVENT_GROUP + * + * { u16 nr, + * hv, + * kernel, + * user; + * u64 ips[nr]; } && __PERF_EVENT_CALLCHAIN + * + * { u64 time; } && __PERF_EVENT_TIME + * }; */ PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = PERF_RECORD_IP, -- cgit v1.2.3 From 4c9e25428ff46b968a30f1dfafdba550cb6e4141 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:09 +0200 Subject: perf_counter: change event definition Currently the definition of an event is slightly ambiguous. We have wakeup events, for poll() and SIGIO, which are either generated when a record crosses a page boundary (hw_events.wakeup_events == 0), or every wakeup_events new records. Now a record can be either a counter overflow record, or a number of different things, like the mmap PROT_EXEC region notifications. Then there is the PERF_COUNTER_IOC_REFRESH event limit, which only considers counter overflows. This patch changes then wakeup_events and SIGIO notification to only consider overflow events. Furthermore it changes the SIGIO notification to report SIGHUP when the event limit is reached and the counter will be disabled. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.266679874@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 81220188d058..0f5a4005048f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -439,6 +439,7 @@ struct perf_counter { /* delayed work for NMIs and such */ int pending_wakeup; + int pending_kill; int pending_disable; struct perf_pending_entry pending; -- cgit v1.2.3 From 4af4998b8aa35600f4c4a4f3c3a23baca6081d02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:10 +0200 Subject: perf_counter: rework context time Since perf_counter_context is switched along with tasks, we can maintain the context time without using the task runtime clock. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.353552838@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0f5a4005048f..7f5d353d78ac 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -477,14 +477,10 @@ struct perf_counter_context { struct task_struct *task; /* - * time_now is the current time in nanoseconds since an arbitrary - * point in the past. For per-task counters, this is based on the - * task clock, and for per-cpu counters it is based on the cpu clock. - * time_lost is an offset from the task/cpu clock, used to make it - * appear that time only passes while the context is scheduled in. + * Context clock, runs when context enabled. */ - u64 time_now; - u64 time_lost; + u64 time; + u64 timestamp; #endif }; -- cgit v1.2.3 From 849691a6cd40270ff5f4a8846d5f6bf8df663ffc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:12 +0200 Subject: perf_counter: remove rq->lock usage Now that all the task runtime clock users are gone, remove the ugly rq->lock usage from perf counters, which solves the nasty deadlock seen when a software task clock counter was read from an NMI overflow context. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.531137582@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index b6d2887a5d88..080d1fd461d7 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -85,8 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq) /* * Lock/unlock the current runqueue - to extract task statistics: */ -extern void curr_rq_lock_irq_save(unsigned long *flags); -extern void curr_rq_unlock_irq_restore(unsigned long *flags); extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); -- cgit v1.2.3 From a26b89f05d194413c7238e0bea071054f6b5d3c8 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:34 +0200 Subject: sched, hw-branch-tracer: add wait_task_context_switch() function to sched.h Add a function to wait until some other task has been switched out at least once. This differs from wait_task_inactive() subtly, in that the latter will wait until the task has left the CPU. Signed-off-by: Markus Metzger Cc: markus.t.metzger@gmail.com Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144549.794157000@intel.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b94f3541f67b..a5b9a83065fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1993,8 +1993,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +extern void wait_task_context_switch(struct task_struct *p); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void wait_task_context_switch(struct task_struct *p) {} static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { -- cgit v1.2.3 From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:35 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping When a ptraced task is unlinked, we need to stop branch tracing for that task. Since the unlink is called with interrupts disabled, and we need interrupts enabled to stop branch tracing, we defer the work. Collect all branch tracing related stuff in a branch tracing context. Reviewed-by: Oleg Nesterov Signed-off-by: Markus Metzger Cc: Andrew Morton Cc: Peter Zijlstra Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144550.712401000@intel.com> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 ++- include/linux/sched.h | 9 ++------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c7..64d8ed2538ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,6 +13,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1321,6 +1322,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); -extern void release_locked_buffer(void *buffer, size_t size); +extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a5b9a83065fa..52b8cd049c2e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -96,8 +96,8 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; -struct bts_tracer; struct fs_struct; +struct bts_context; /* * List of flags we want to share for kernel threads, @@ -1210,12 +1210,7 @@ struct task_struct { * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ - struct bts_tracer *bts; - /* - * The buffer to hold the BTS data. - */ - void *bts_buffer; - size_t bts_size; + struct bts_context *bts; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ -- cgit v1.2.3 From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:48 +0200 Subject: x86, ptrace: add bts context unconditionally Add the ptrace bts context field to task_struct unconditionally. Initialize the field directly in copy_process(). Remove all the unneeded functionality used to initialize that field. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144603.292754000@intel.com> Signed-off-by: Ingo Molnar --- include/linux/ptrace.h | 10 ---------- include/linux/sched.h | 2 -- 2 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c15653fc23..59e133d39d50 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_untrace(task) do { } while (0) #endif -#ifndef arch_ptrace_fork -/* - * Do machine-specific work to initialize a new task. - * - * This is called from copy_process(). - */ -#define arch_ptrace_fork(child, clone_flags) do { } while (0) -#endif - extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/sched.h b/include/linux/sched.h index 52b8cd049c2e..451186a22ef5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1205,13 +1205,11 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS /* * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ struct bts_context *bts; -#endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; -- cgit v1.2.3 From 06f409d76f1d382167eb1cadde2e23a73272865d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Apr 2009 18:10:13 +0100 Subject: ASoC: Provide core support for symmetric sample rates Many devices require symmetric configurations of capture and playback data formats, often due to shared clocking but sometimes also due to other shared playback and record configuration in the device. Start providing core support for this by allowing the DAIs or the machine to specify that the sample rates used should be kept symmetric. A flag symmetric_rates is provided in the snd_soc_dai and snd_soc_dai_link structures. If this is set in either of the DAIs or in the machine then a constraint will be applied when a stream is already open preventing any changes in sample rate. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 1 + include/sound/soc.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 13676472ddfc..22b729fbbf84 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -208,6 +208,7 @@ struct snd_soc_dai { /* DAI capabilities */ struct snd_soc_pcm_stream capture; struct snd_soc_pcm_stream playback; + unsigned int symmetric_rates:1; /* DAI runtime info */ struct snd_pcm_runtime *runtime; diff --git a/include/sound/soc.h b/include/sound/soc.h index a40bc6f316fc..b1f2f8819fea 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -417,6 +417,12 @@ struct snd_soc_dai_link { /* codec/machine specific init - e.g. add machine controls */ int (*init)(struct snd_soc_codec *codec); + /* Symmetry requirements */ + unsigned int symmetric_rates:1; + + /* Symmetry data - only valid if symmetry is being enforced */ + unsigned int rate; + /* DAI pcm */ struct snd_pcm *pcm; }; -- cgit v1.2.3 From 44bc9dc729e33a4ec6ebed4d0b6c08e8d20b42cf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 10:47:17 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping, cleanup Andrew Morton noticed that mm.h needlessly includes sched.h - remove it. Reported-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 64d8ed2538ae..776b641f37e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,7 +13,6 @@ #include #include #include -#include struct mempolicy; struct anon_vma; -- cgit v1.2.3 From a34b50ddc265bae058c66661b096ef6384c5a8b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 10:56:54 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping, remove dead code Remove the unused free_locked_buffer() API. Signed-off-by: Ingo Molnar --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 776b641f37e3..a3963ba23a6d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1320,7 +1320,6 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); -extern void free_locked_buffer(void *buffer, size_t size); extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 42d7c5e353cef9062129b0de3ec9ddf10567b9ca Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:21 -0500 Subject: swiotlb: change swiotlb_bus_to[phys,virt] prototypes Add a hwdev argument that is needed on some architectures in order to access a per-device offset that is taken into account when producing a physical address (also needed to get from bus address to virtual address because the physical address is an intermediate step). Also make swiotlb_bus_to_virt weak so architectures can override it. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-8-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ac9ff54f7cb3..cb1a6631b8f4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, + dma_addr_t address); extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); -- cgit v1.2.3 From 6fab01927e8bdbbc77bafba2abb4810c5591ad52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:26 +0200 Subject: perf_counter: provide misc bits in the event header Limit the size of each record to 64k (or should we count in multiples of u64 and have a 512K limit?), this gives 16 bits or spare room in the header, which we can use for misc bits, so as to not have to grow the record with u64 every time we have a few bits to report. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.769271806@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7f5d353d78ac..5bd8817b12d4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -201,9 +201,13 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (1 << 1) + struct perf_event_header { __u32 type; - __u32 size; + __u16 misc; + __u16 size; }; enum perf_event_type { -- cgit v1.2.3 From 6b6e5486b3a168f0328c82a8d4376caf901472b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:27 +0200 Subject: perf_counter: use misc field to widen type Push the PERF_EVENT_COUNTER_OVERFLOW bit into the misc field so that we can have the full 32bit for PERF_RECORD_ bits. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.891867663@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5bd8817b12d4..4809ae18a940 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -201,8 +201,9 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -230,36 +231,27 @@ enum perf_event_type { PERF_EVENT_MUNMAP = 2, /* - * Half the event type space is reserved for the counter overflow - * bitfields, as found in hw_event.record_type. - * - * These events will have types of the form: - * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field + * will be PERF_RECORD_* * * struct { * struct perf_event_header header; * - * { u64 ip; } && __PERF_EVENT_IP - * { u32 pid, tid; } && __PERF_EVENT_TID + * { u64 ip; } && PERF_RECORD_IP + * { u32 pid, tid; } && PERF_RECORD_TID * * { u64 nr; - * { u64 event, val; } cnt[nr]; } && __PERF_EVENT_GROUP + * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP * * { u16 nr, * hv, * kernel, * user; - * u64 ips[nr]; } && __PERF_EVENT_CALLCHAIN + * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN * - * { u64 time; } && __PERF_EVENT_TIME + * { u64 time; } && PERF_RECORD_TIME * }; */ - PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = PERF_RECORD_IP, - __PERF_EVENT_TID = PERF_RECORD_TID, - __PERF_EVENT_GROUP = PERF_RECORD_GROUP, - __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, - __PERF_EVENT_TIME = PERF_RECORD_TIME, }; #ifdef __KERNEL__ -- cgit v1.2.3 From 8740f9418c78dcad694b46ab25d1645d5aef1f5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:29 +0200 Subject: perf_counter: add some comments Add a few comments because I was forgetting what field what for what functionality. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.036984214@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4809ae18a940..8bf764fc6220 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -344,10 +344,12 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; - int nr_pages; - atomic_t wakeup; - atomic_t head; - atomic_t events; + int nr_pages; /* nr of data pages */ + + atomic_t wakeup; /* POLL_ for wakeups */ + atomic_t head; /* write position */ + atomic_t events; /* event limit */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; -- cgit v1.2.3 From 8d1b2d9361b494bfc761700c348c65ebbe3deb5b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:30 +0200 Subject: perf_counter: track task-comm data Similar to the mmap data stream, add one that tracks the task COMM field, so that the userspace reporting knows what to call a task. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.127422406@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8bf764fc6220..a70a55f27598 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -142,8 +142,9 @@ struct perf_counter_hw_event { exclude_idle : 1, /* don't count when idle */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ + comm : 1, /* include comm data */ - __reserved_1 : 53; + __reserved_1 : 52; __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ @@ -230,6 +231,16 @@ enum perf_event_type { PERF_EVENT_MMAP = 1, PERF_EVENT_MUNMAP = 2, + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_EVENT_COMM = 3, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -545,6 +556,8 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); +extern void perf_counter_comm(struct task_struct *tsk); + #define MAX_STACK_DEPTH 255 struct perf_callchain_entry { @@ -583,6 +596,7 @@ static inline void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } +static inline void perf_counter_comm(struct task_struct *tsk) { } #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From 4d855457d84b819fefcd1cd1b0a2a0a0ec475c07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:32 +0200 Subject: perf_counter: move PERF_RECORD_TIME Move PERF_RECORD_TIME so that all the fixed length items come before the variable length ones. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.307926436@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a70a55f27598..8bd1be58c938 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -100,9 +100,9 @@ enum sw_event_ids { enum perf_counter_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, - PERF_RECORD_GROUP = 1U << 2, - PERF_RECORD_CALLCHAIN = 1U << 3, - PERF_RECORD_TIME = 1U << 4, + PERF_RECORD_TIME = 1U << 2, + PERF_RECORD_GROUP = 1U << 3, + PERF_RECORD_CALLCHAIN = 1U << 4, }; /* @@ -250,6 +250,7 @@ enum perf_event_type { * * { u64 ip; } && PERF_RECORD_IP * { u32 pid, tid; } && PERF_RECORD_TID + * { u64 time; } && PERF_RECORD_TIME * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP @@ -259,8 +260,6 @@ enum perf_event_type { * kernel, * user; * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN - * - * { u64 time; } && PERF_RECORD_TIME * }; */ }; -- cgit v1.2.3 From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:33 +0200 Subject: perf_counter: allow for data addresses to be recorded Paul suggested we allow for data addresses to be recorded along with the traditional IPs as power can provide these. For now, only the software pagefault events provide data addresses, but in the future power might as well for some events. x86 doesn't seem capable of providing this atm. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.394816925@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8bd1be58c938..c22363a4f746 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -101,8 +101,9 @@ enum perf_counter_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_GROUP = 1U << 3, - PERF_RECORD_CALLCHAIN = 1U << 4, + PERF_RECORD_ADDR = 1U << 3, + PERF_RECORD_GROUP = 1U << 4, + PERF_RECORD_CALLCHAIN = 1U << 5, }; /* @@ -251,6 +252,7 @@ enum perf_event_type { * { u64 ip; } && PERF_RECORD_IP * { u32 pid, tid; } && PERF_RECORD_TID * { u64 time; } && PERF_RECORD_TIME + * { u64 addr; } && PERF_RECORD_ADDR * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP @@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); extern int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs); + int nmi, struct pt_regs *regs, u64 addr); /* * Return 1 for a software counter, 0 for a hardware counter */ @@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter) perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; } -extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); extern void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); @@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } - +perf_swcounter_event(u32 event, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } static inline void perf_counter_mmap(unsigned long addr, unsigned long len, -- cgit v1.2.3 From 1c1452be2e9ae282a7316c3b23987811bd7acda6 Mon Sep 17 00:00:00 2001 From: Jonas Larsson Date: Tue, 31 Mar 2009 11:16:48 +0200 Subject: atmel-mci: Add support for inverted detect pin Same patch as before, modified to use bool. Also adds description of the new field in struct atmel_mci that I missed in the first patch. This patch adds Atmel MCI support for inverted detect pins. Signed-off-by: Jonas Larsson Acked-by: Pierre Ossman Signed-off-by: Haavard Skinnemoen --- include/linux/atmel-mci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h index 2f1f95737acb..57b1846a3c87 100644 --- a/include/linux/atmel-mci.h +++ b/include/linux/atmel-mci.h @@ -10,6 +10,7 @@ * @bus_width: Number of data lines wired up the slot * @detect_pin: GPIO pin wired to the card detect switch * @wp_pin: GPIO pin wired to the write protect sensor + * @detect_is_active_high: The state of the detect pin when it is active * * If a given slot is not present on the board, @bus_width should be * set to 0. The other fields are ignored in this case. @@ -24,6 +25,7 @@ struct mci_slot_pdata { unsigned int bus_width; int detect_pin; int wp_pin; + bool detect_is_active_high; }; /** -- cgit v1.2.3 From 1ccd15497869f3ed83b5225d410df53a96e52757 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Apr 2009 10:53:45 +0200 Subject: perf_counter: sysctl for system wide perf counters Impact: add sysctl for paranoid/relaxed perfcounters policy Allow the use of system wide perf counters to everybody, but provide a sysctl to disable it for the paranoid security minded. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090409085524.514046352@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c22363a4f746..981432885301 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -568,6 +568,8 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +extern int sysctl_perf_counter_priv; + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From 2062501ae6505dbc5bff3a792246c2661d114050 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Apr 2009 01:49:33 +0200 Subject: tracing/lockdep: report the time waited for a lock While trying to optimize the new lock on reiserfs to replace the bkl, I find the lock tracing very useful though it lacks something important for performance (and latency) instrumentation: the time a task waits for a lock. That's what this patch implements: bash-4816 [000] 202.652815: lock_contended: lock_contended: &sb->s_type->i_mutex_key bash-4816 [000] 202.652819: lock_acquired: &rq->lock (0.000 us) <...>-4787 [000] 202.652825: lock_acquired: &rq->lock (0.000 us) <...>-4787 [000] 202.652829: lock_acquired: &rq->lock (0.000 us) bash-4816 [000] 202.652833: lock_acquired: &sb->s_type->i_mutex_key (16.005 us) As shown above, the "lock acquired" field is followed by the time it has been waiting for the lock. Usually, a lock contended entry is followed by a near lock_acquired entry with a non-zero time waited. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <1238975373-15739-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/lockdep_event_types.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h index adccfcd2ec8f..863f1e4583a6 100644 --- a/include/trace/lockdep_event_types.h +++ b/include/trace/lockdep_event_types.h @@ -32,11 +32,24 @@ TRACE_FORMAT(lock_contended, TP_FMT("%s", lock->name) ); -TRACE_FORMAT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); #endif #endif -- cgit v1.2.3 From 5cb3d1d9d34ac04bcaa2034139345b2a5fea54c1 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Thu, 9 Apr 2009 14:08:18 +0800 Subject: tracing, net, skb tracepoint: make skb tracepoint use the TRACE_EVENT() macro TRACE_EVENT is a more generic way to define a tracepoint. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Signed-off-by: Zhao Lei Acked-by: Neil Horman Cc: "David S. Miller" Cc: Arnaldo Carvalho de Melo Cc: "Steven Rostedt ;" Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DD90D2.5020604@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/skb.h | 4 +--- include/trace/skb_event_types.h | 38 ++++++++++++++++++++++++++++++++++++++ include/trace/trace_event_types.h | 1 + include/trace/trace_events.h | 1 + 4 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 include/trace/skb_event_types.h (limited to 'include') diff --git a/include/trace/skb.h b/include/trace/skb.h index b66206d9be72..d2de7174a6e8 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -4,8 +4,6 @@ #include #include -DECLARE_TRACE(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb, location)); +#include #endif diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h new file mode 100644 index 000000000000..4a1c504c0e16 --- /dev/null +++ b/include/trace/skb_event_types.h @@ -0,0 +1,38 @@ + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h index df56f5694be6..33b6bfcba93b 100644 --- a/include/trace/trace_event_types.h +++ b/include/trace/trace_event_types.h @@ -3,3 +3,4 @@ #include #include #include +#include diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index fd13750ca4ba..0e2aa80076d9 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -3,3 +3,4 @@ #include #include #include +#include -- cgit v1.2.3 From 02af61bb50f5d5f0322dbe5ab2a0d75808d25c7b Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:26:18 +0800 Subject: tracing, kmemtrace: Separate include/trace/kmemtrace.h to kmemtrace part and tracepoint part Impact: refactor code for future changes Current kmemtrace.h is used both as header file of kmemtrace and kmem's tracepoints definition. Tracepoints' definition file may be used by other code, and should only have definition of tracepoint. We can separate include/trace/kmemtrace.h into 2 files: include/linux/kmemtrace.h: header file for kmemtrace include/trace/kmem.h: definition of kmem tracepoints Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE68A.5040902@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/kmemtrace.h | 25 +++++++++++++++++++ include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- include/trace/kmem.h | 44 +++++++++++++++++++++++++++++++++ include/trace/kmemtrace.h | 63 ----------------------------------------------- 5 files changed, 71 insertions(+), 65 deletions(-) create mode 100644 include/linux/kmemtrace.h create mode 100644 include/trace/kmem.h delete mode 100644 include/trace/kmemtrace.h (limited to 'include') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h new file mode 100644 index 000000000000..15c45a27a925 --- /dev/null +++ b/include/linux/kmemtrace.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include + +#ifdef CONFIG_KMEMTRACE +extern void kmemtrace_init(void); +#else +static inline void kmemtrace_init(void) +{ +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5ac9b0bcaf9a..713f841ecaa9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include +#include /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5046f90c1171..be5d40c43bd2 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/trace/kmem.h b/include/trace/kmem.h new file mode 100644 index 000000000000..24d251928182 --- /dev/null +++ b/include/trace/kmem.h @@ -0,0 +1,44 @@ +#ifndef _TRACE_KMEM_H +#define _TRACE_KMEM_H + +#include +#include + +DECLARE_TRACE(kmalloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmem_cache_alloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmalloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kmem_cache_alloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kfree, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); +DECLARE_TRACE(kmem_cache_free, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); + +#endif /* _TRACE_KMEM_H */ diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h deleted file mode 100644 index 28ee69f9cd46..000000000000 --- a/include/trace/kmemtrace.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include -#include - -#ifdef CONFIG_KMEMTRACE -extern void kmemtrace_init(void); -#else -static inline void kmemtrace_init(void) -{ -} -#endif - -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - -- cgit v1.2.3 From fc182a4330fc22ea1b68fa3d5064dd85a73a4c4a Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:27:38 +0800 Subject: tracing, kmemtrace: Make kmem tracepoints use TRACE_EVENT macro TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE6DA.80600@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/kmem.h | 39 +------- include/trace/kmem_event_types.h | 193 ++++++++++++++++++++++++++++++++++++++ include/trace/trace_event_types.h | 1 + include/trace/trace_events.h | 1 + 4 files changed, 197 insertions(+), 37 deletions(-) create mode 100644 include/trace/kmem_event_types.h (limited to 'include') diff --git a/include/trace/kmem.h b/include/trace/kmem.h index 24d251928182..46efc2423f03 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -1,44 +1,9 @@ #ifndef _TRACE_KMEM_H #define _TRACE_KMEM_H -#include #include +#include -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); +#include #endif /* _TRACE_KMEM_H */ diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h new file mode 100644 index 000000000000..4ff420fe4675 --- /dev/null +++ b/include/trace/kmem_event_types.h @@ -0,0 +1,193 @@ + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h index 33b6bfcba93b..552a50e169a6 100644 --- a/include/trace/trace_event_types.h +++ b/include/trace/trace_event_types.h @@ -4,3 +4,4 @@ #include #include #include +#include diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 0e2aa80076d9..13d6b85668cf 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -4,3 +4,4 @@ #include #include #include +#include -- cgit v1.2.3 From f6d655a6e6974e474a11b25052c29d10b80814b3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Apr 2009 11:27:03 +0100 Subject: ASoC: Support DAPM events for DACs and ADCs Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index a7def6a9a030..fcc929da0339 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -140,9 +140,19 @@ #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \ { .id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \ .shift = wshift, .invert = winvert} +#define SND_SOC_DAPM_DAC_E(wname, stname, wreg, wshift, winvert, \ + wevent, wflags) \ +{ .id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \ + .shift = wshift, .invert = winvert, \ + .event = wevent, .event_flags = wflags} #define SND_SOC_DAPM_ADC(wname, stname, wreg, wshift, winvert) \ { .id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \ .shift = wshift, .invert = winvert} +#define SND_SOC_DAPM_ADC_E(wname, stname, wreg, wshift, winvert, \ + wevent, wflags) \ +{ .id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \ + .shift = wshift, .invert = winvert, \ + .event = wevent, .event_flags = wflags} /* generic register modifier widget */ #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \ -- cgit v1.2.3 From fa1b47dd85453ec7d4bcfe4aa4a2d172ba452fc3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Apr 2009 00:09:41 -0400 Subject: ring-buffer: add ring_buffer_discard_commit The ring_buffer_discard_commit is similar to ring_buffer_event_discard but it can only be done on an event that has yet to be commited. Unpredictable results can happen otherwise. The main difference between ring_buffer_discard_commit and ring_buffer_event_discard is that ring_buffer_discard_commit will try to free the data in the ring buffer if nothing has addded data after the reserved event. If something did, then it acts almost the same as ring_buffer_event_discard followed by a ring_buffer_unlock_commit. Note, either ring_buffer_commit_discard and ring_buffer_unlock_commit can be called on an event, not both. This commit also exports both discard functions to be usable by GPL modules. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e1b7b2173885..f0aa486d131c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -68,8 +68,37 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +/* + * ring_buffer_event_discard can discard any event in the ring buffer. + * it is up to the caller to protect against a reader from + * consuming it or a writer from wrapping and replacing it. + * + * No external protection is needed if this is called before + * the event is commited. But in that case it would be better to + * use ring_buffer_discard_commit. + * + * Note, if an event that has not been committed is discarded + * with ring_buffer_event_discard, it must still be committed. + */ void ring_buffer_event_discard(struct ring_buffer_event *event); +/* + * ring_buffer_discard_commit will remove an event that has not + * ben committed yet. If this is used, then ring_buffer_unlock_commit + * must not be called on the discarded event. This function + * will try to remove the event from the ring buffer completely + * if another event has not been written after it. + * + * Example use: + * + * if (some_condition) + * ring_buffer_discard_commit(buffer, event); + * else + * ring_buffer_unlock_commit(buffer, event); + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + /* * size is in bytes for each per CPU buffer. */ -- cgit v1.2.3 From 5f77a88b3f8268b11940b51d2e03d26a663ceb90 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 8 Apr 2009 03:14:01 -0500 Subject: tracing/infrastructure: separate event tracer from event support Add a new config option, CONFIG_EVENT_TRACING that gets selected when CONFIG_TRACING is selected and adds everything needed by the stuff in trace_export - basically all the event tracing support needed by e.g. bprint, minus the actual events, which are only included if CONFIG_EVENT_TRACER is selected. So CONFIG_EVENT_TRACER can be used to turn on or off the generated events (what I think of as the 'event tracer'), while CONFIG_EVENT_TRACING turns on or off the base event tracing support used by both the event tracer and the other things such as bprint that can't be configured out. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: fweisbec@gmail.com LKML-Reference: <1239178441.10295.34.camel@tropicana> Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7fa660fd449c..7e9b1e9f711c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -61,7 +61,7 @@ #define BRANCH_PROFILE() #endif -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; -- cgit v1.2.3 From 6e837fb152410e571a81aaadbd9884f0bc46a55e Mon Sep 17 00:00:00 2001 From: Etienne Basset Date: Wed, 8 Apr 2009 20:39:40 +0200 Subject: smack: implement logging V3 This patch creates auditing functions usable by LSM to audit security events. It provides standard dumping of FS, NET, task etc ... events (code borrowed from SELinux) and provides 2 callbacks to define LSM specific auditing, which should be flexible enough to convert SELinux too. Signed-off-by: Etienne Basset Acked-by: Casey Schaufler cked-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 111 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 include/linux/lsm_audit.h (limited to 'include') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h new file mode 100644 index 000000000000..e461b2c3d711 --- /dev/null +++ b/include/linux/lsm_audit.h @@ -0,0 +1,111 @@ +/* + * Common LSM logging functions + * Heavily borrowed from selinux/avc.h + * + * Author : Etienne BASSET + * + * All credits to : Stephen Smalley, + * All BUGS to : Etienne BASSET + */ +#ifndef _LSM_COMMON_LOGGING_ +#define _LSM_COMMON_LOGGING_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Auxiliary data to use in generating the audit record. */ +struct common_audit_data { + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 + struct task_struct *tsk; + union { + struct { + struct path path; + struct inode *inode; + } fs; + struct { + int netif; + struct sock *sk; + u16 family; + __be16 dport; + __be16 sport; + union { + struct { + __be32 daddr; + __be32 saddr; + } v4; + struct { + struct in6_addr daddr; + struct in6_addr saddr; + } v6; + } fam; + } net; + int cap; + int ipc_id; + struct task_struct *tsk; +#ifdef CONFIG_KEYS + struct { + key_serial_t key; + char *key_desc; + } key_struct; +#endif + } u; + const char *function; + /* this union contains LSM specific data */ + union { + /* SMACK data */ + struct smack_audit_data { + char *subject; + char *object; + char *request; + int result; + } smack_audit_data; + /* SELinux data */ + struct { + u32 ssid; + u32 tsid; + u16 tclass; + u32 requested; + u32 audited; + struct av_decision *avd; + int result; + } selinux_audit_data; + } lsm_priv; + /* these callback will be implemented by a specific LSM */ + void (*lsm_pre_audit)(struct audit_buffer *, void *); + void (*lsm_post_audit)(struct audit_buffer *, void *); +}; + +#define v4info fam.v4 +#define v6info fam.v6 + +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +/* Initialize an LSM audit data structure. */ +#define COMMON_AUDIT_DATA_INIT(_d, _t) \ + { memset((_d), 0, sizeof(struct common_audit_data)); \ + (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } + +void common_lsm_audit(struct common_audit_data *a); + +#endif -- cgit v1.2.3 From 7ba5c840e64d4a967379f1ae3eca73278180b11d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Apr 2009 21:31:17 -0700 Subject: rcu: Add __rcu_pending tracing to hierarchical RCU Add tracing to __rcu_pending() to provide information on why RCU processing was kicked off. This is helpful for debugging hierarchical RCU, and might also be helpful in learning how hierarchical RCU operates. Located-by: Anton Blanchard Tested-by: Anton Blanchard Signed-off-by: Paul E. McKenney Cc: anton@samba.org Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: manfred@colorfullife.com Cc: cl@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: schamp@sgi.com Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: ego@in.ibm.com Cc: laijs@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: peterz@infradead.org Cc: penberg@cs.helsinki.fi Cc: andi@firstfloor.org Cc: "Paul E. McKenney" LKML-Reference: <1239683479943-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 58b2aa5312b9..5a5153806c42 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,8 +161,15 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) For future __rcu_pending statistics. */ + /* 5) __rcu_pending() statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; int cpu; }; -- cgit v1.2.3 From 02bec490450836ebbd628e97ec03f10b57def8ce Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Tue, 24 Mar 2009 12:24:35 +0100 Subject: ALSA: lx6464es - driver for the digigram lx6464es interface prototype of a driver for the digigram lx6464es 64 channel ethersound interface. Signed-off-by: Tim Blechmann Signed-off-by: Takashi Iwai --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ee98cd570885..2b1a69598e74 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1005,6 +1005,7 @@ #define PCI_DEVICE_ID_PLX_PCI200SYN 0x3196 #define PCI_DEVICE_ID_PLX_9030 0x9030 #define PCI_DEVICE_ID_PLX_9050 0x9050 +#define PCI_DEVICE_ID_PLX_9056 0x9056 #define PCI_DEVICE_ID_PLX_9080 0x9080 #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001 @@ -1847,6 +1848,10 @@ #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO 0x0107 #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2 0x0108 +#define PCI_VENDOR_ID_DIGIGRAM 0x1369 +#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM 0xc001 +#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM 0xc002 + #define PCI_VENDOR_ID_KAWASAKI 0x136b #define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01 -- cgit v1.2.3 From ea20d9293ce423a39717ed4375393129a2e701f9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 08:54:16 -0400 Subject: tracing: consolidate trace and trace_event headers Impact: clean up Neil Horman (et. al.) criticized the way the trace events were broken up into two files. The reason for that was that ftrace needed to separate out the declarations from where the #include was used. It then dawned on me that the tracepoint.h header only needs to define the TRACE_EVENT macro if it is not already defined. The solution is simply to test if TRACE_EVENT is defined, and if it is not then the linux/tracepoint.h header can define it. This change consolidates all the .h and _event_types.h into the .h file. Reported-by: Neil Horman Reported-by: Theodore Tso Reported-by: Jiaying Zhang Cc: Zhaolei Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Jason Baron Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 9 +- include/trace/irq.h | 51 +++++- include/trace/irq_event_types.h | 55 ------ include/trace/kmem.h | 189 +++++++++++++++++++- include/trace/lockdep.h | 52 +++++- include/trace/lockdep_event_types.h | 57 ------ include/trace/sched.h | 333 ++++++++++++++++++++++++++++++++++- include/trace/sched_event_types.h | 337 ------------------------------------ include/trace/skb.h | 36 +++- include/trace/skb_event_types.h | 38 ---- include/trace/trace_event_types.h | 7 - 11 files changed, 654 insertions(+), 510 deletions(-) delete mode 100644 include/trace/irq_event_types.h delete mode 100644 include/trace/lockdep_event_types.h delete mode 100644 include/trace/sched_event_types.h delete mode 100644 include/trace/skb_event_types.h delete mode 100644 include/trace/trace_event_types.h (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d35a7ee7611f..4353f3f7e624 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,6 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +#ifndef DECLARE_TRACE + #define TP_PROTO(args...) args #define TP_ARGS(args...) args @@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } #endif /* CONFIG_TRACEPOINTS */ +#endif /* DECLARE_TRACE */ /* * Connect a probe to a tracepoint. @@ -154,10 +157,13 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args + +#ifndef TRACE_FORMAT #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif - +#ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * @@ -262,5 +268,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif #endif diff --git a/include/trace/irq.h b/include/trace/irq.h index ff5d4495dc37..04ab4c652225 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -1,9 +1,54 @@ -#ifndef _TRACE_IRQ_H +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IRQ_H -#include #include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); -#include +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); #endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h deleted file mode 100644 index 85964ebd47ec..000000000000 --- a/include/trace/irq_event_types.h +++ /dev/null @@ -1,55 +0,0 @@ - -/* use instead */ -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#undef TRACE_SYSTEM diff --git a/include/trace/kmem.h b/include/trace/kmem.h index 46efc2423f03..d7d12189e5c8 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -1,9 +1,192 @@ -#ifndef _TRACE_KMEM_H +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KMEM_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem -#endif /* _TRACE_KMEM_H */ +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +#endif diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 5ca67df87f2a..8ee7900b38c4 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -1,9 +1,57 @@ -#ifndef _TRACE_LOCKDEP_H +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_LOCKDEP_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); #endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h deleted file mode 100644 index 863f1e4583a6..000000000000 --- a/include/trace/lockdep_event_types.h +++ /dev/null @@ -1,57 +0,0 @@ - -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lock - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#undef TRACE_SYSTEM diff --git a/include/trace/sched.h b/include/trace/sched.h index 4e372a1a29bf..5b1cf4a28463 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -1,9 +1,336 @@ -#ifndef _TRACE_SCHED_H +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched -#endif +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h deleted file mode 100644 index 63547dc1125f..000000000000 --- a/include/trace/sched_event_types.h +++ /dev/null @@ -1,337 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#undef TRACE_SYSTEM diff --git a/include/trace/skb.h b/include/trace/skb.h index d2de7174a6e8..e6fd281f7f81 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -1,9 +1,37 @@ -#ifndef _TRACE_SKB_H_ -#define _TRACE_SKB_H_ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb -#endif +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h deleted file mode 100644 index 4a1c504c0e16..000000000000 --- a/include/trace/skb_event_types.h +++ /dev/null @@ -1,38 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h deleted file mode 100644 index 552a50e169a6..000000000000 --- a/include/trace/trace_event_types.h +++ /dev/null @@ -1,7 +0,0 @@ -/* trace/_event_types.h here */ - -#include -#include -#include -#include -#include -- cgit v1.2.3 From 78ddb08feb7d4fbe3c0a9931804c51ee58be4023 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 14 Apr 2009 16:53:05 +0200 Subject: wait: don't use __wake_up_common() '777c6c5 wait: prevent exclusive waiter starvation' made __wake_up_common() global to be used from abort_exclusive_wait(). It was needed to do a wake-up with the waitqueue lock held while passing down a key to the wake-up function. Since '4ede816 epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()' there is an appropriate wrapper for this case: __wake_up_locked_key(). Use it here and make __wake_up_common() private to the scheduler again. Signed-off-by: Johannes Weiner Cc: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <1239720785-19661-1-git-send-email-hannes@cmpxchg.org> Signed-off-by: Ingo Molnar --- include/linux/wait.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c17eaee..67d4e89b62d6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, -- cgit v1.2.3 From 72c6a9870f901045f2464c3dc6ee8914bfdc07aa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Apr 2009 17:33:57 +0200 Subject: rculist.h: introduce list_entry_rcu() and list_first_entry_rcu() I've run into the situation where I need to use list_first_entry with rcu-guarded list. This patch introduces this. Also simplify list_for_each_entry_rcu() to use new list_entry_rcu() instead of list_entry(). Signed-off-by: Jiri Pirko Reviewed-by: Paul E. McKenney Cc: dipankar@in.ibm.com LKML-Reference: <20090414153356.GC3999@psychotron.englab.brq.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c97..5710f43bbc9e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(rcu_dereference(ptr), type, member) + +/** + * list_first_entry_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) + #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ @@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** -- cgit v1.2.3 From a8d154b009168337494fbf345671bab74d3e4b8b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 09:36:00 -0400 Subject: tracing: create automated trace defines This patch lowers the number of places a developer must modify to add new tracepoints. The current method to add a new tracepoint into an existing system is to write the trace point macro in the trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or DECLARE_TRACE, then they must add the same named item into the C file with the macro DEFINE_TRACE(name) and then add the trace point. This change cuts out the needing to add the DEFINE_TRACE(name). Every file that uses the tracepoint must still include the trace/.h file, but the one C file must also add a define before the including of that file. #define CREATE_TRACE_POINTS #include This will cause the trace/mytrace.h file to also produce the C code necessary to implement the trace point. Note, if more than one trace/.h is used to create the C code it is best to list them all together. #define CREATE_TRACE_POINTS #include #include #include Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with the cleaner solution of the define above the includes over my first design to have the C code include a "special" header. This patch converts sched, irq and lockdep and skb to use this new method. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 75 ++++++++++++++++++++++++++++++++++++++++++++ include/trace/irq.h | 5 ++- include/trace/kmem.h | 4 ++- include/trace/lockdep.h | 3 ++ include/trace/sched.h | 3 ++ include/trace/skb.h | 3 ++ 6 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 include/trace/define_trace.h (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h new file mode 100644 index 000000000000..de9dc7d8508b --- /dev/null +++ b/include/trace/define_trace.h @@ -0,0 +1,75 @@ +/* + * Trace files that want to automate creationg of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_TRACE_POINTS + +/* Prevent recursion */ +#undef CREATE_TRACE_POINTS + +#include + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DEFINE_TRACE(name) + +#undef TRACE_FORMAT +#define TRACE_FORMAT(name, proto, args, print) \ + DEFINE_TRACE(name) + +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) \ + DEFINE_TRACE(name) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) +# define UNDEF_TRACE_INCLUDE_FILE +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_HEADER_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_HEADER_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +/* We may be processing more files */ +#define CREATE_TRACE_POINTS + +#endif /* CREATE_TRACE_POINTS */ diff --git a/include/trace/irq.h b/include/trace/irq.h index 04ab4c652225..75e3468e4493 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -51,4 +51,7 @@ TRACE_FORMAT(softirq_exit, TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) ); -#endif +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h index d7d12189e5c8..c22c42f980b5 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -188,5 +188,7 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); +#endif /* _TRACE_KMEM_H */ -#endif +/* This part must be outside protection */ +#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 8ee7900b38c4..4d301e758de3 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -55,3 +55,6 @@ TRACE_EVENT(lock_acquired, #endif #endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/sched.h b/include/trace/sched.h index 5b1cf4a28463..ffa1cab586b9 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -334,3 +334,6 @@ TRACE_EVENT(sched_signal_send, ); #endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/skb.h b/include/trace/skb.h index e6fd281f7f81..1e8fabb57c06 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -35,3 +35,6 @@ TRACE_EVENT(kfree_skb, ); #endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 9504504cbab29ecb694186b1c5b15d3579c43c51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 11 Apr 2009 12:59:57 -0400 Subject: tracing: make trace_seq operations available for core kernel In the process to make TRACE_EVENT macro work for modules, the trace_seq operations must be available for core kernel code. These operations are quite useful and can be used for other implementations. The main idea is that we create a trace_seq handle that acts very much like the seq_file handle. struct trace_seq *s = kmalloc(sizeof(*s, GFP_KERNEL); trace_seq_init(s); trace_seq_printf(s, "some data %d\n", variable); printk("%s", s->buffer); The main use is to allow a top level function call several other functions that may store printf like data into the buffer. Then at the end, the top level function can process all the data with any method it would like to. It could be passed to userspace, output via printk or even use seq_file: trace_seq_to_user(s, ubuf, cnt); seq_puts(m, s->buffer); Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 89 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 include/linux/trace_seq.h (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h new file mode 100644 index 000000000000..28051da876dd --- /dev/null +++ b/include/linux/trace_seq.h @@ -0,0 +1,89 @@ +#ifndef _LINUX_TRACE_SEQ_H +#define _LINUX_TRACE_SEQ_H + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE. + */ + +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; + unsigned int readpos; +}; + +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + +/* + * Currently only defined when tracing is enabled. + */ +#ifdef CONFIG_TRACING +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); +extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); +extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len); +extern void *trace_seq_reserve(struct trace_seq *s, size_t len); +extern int trace_seq_path(struct trace_seq *s, struct path *path); + +#else /* CONFIG_TRACING */ +static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) +{ + return 0; +} +static inline int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + return 0; +} + +static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ +} +static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt) +{ + return 0; +} +static inline int trace_seq_puts(struct trace_seq *s, const char *str) +{ + return 0; +} +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c); +{ + return 0; +} +static inline int +trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +{ + return 0; +} +static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len) +{ + return 0; +} +static inline void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + return NULL; +} +static inline int trace_seq_path(struct trace_seq *s, struct path *path) +{ + return 0; +} +#endif /* CONFIG_TRACING */ + +#endif /* _LINUX_TRACE_SEQ_H */ -- cgit v1.2.3 From 97f2025153499faa17267a0d4e18c7afaf73f39d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Apr 2009 11:20:49 -0400 Subject: tracing/events: move declarations from trace directory to core include In preparation to allowing trace events to happen in modules, we need to move some of the local declarations in the kernel/trace directory into include/linux. This patch simply moves the declarations and performs no context changes. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 146 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 include/linux/ftrace_event.h (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h new file mode 100644 index 000000000000..496b76d9f9d8 --- /dev/null +++ b/include/linux/ftrace_event.h @@ -0,0 +1,146 @@ +#ifndef _LINUX_FTRACE_EVENT_H +#define _LINUX_FTRACE_EVENT_H + +#include +#include + + +struct trace_array; +struct tracer; + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned char type; + unsigned char flags; + unsigned char preempt_count; + int pid; + int tgid; +}; + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + void *private; + int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; + int cpu; + u64 ts; + + unsigned long iter_flags; + loff_t pos; + long idx; + + cpumask_var_t started; +}; + + +typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, + int flags); +struct trace_event { + struct hlist_node node; + int type; + trace_print_func trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ +}; + + +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); + +void tracing_record_cmdline(struct task_struct *tsk); + +struct ftrace_event_call { + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; + int n_preds; + struct filter_pred **preds; + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif +}; + +#define MAX_FILTER_PRED 8 +#define MAX_FILTER_STR_VAL 128 + +extern int init_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); + +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size); + + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + +#endif /* _LINUX_FTRACE_EVENT_H */ -- cgit v1.2.3 From f42c85e74faa422cf0bc747ed808681145448f88 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Apr 2009 12:25:37 -0400 Subject: tracing/events: move the ftrace event tracing code to core This patch moves the ftrace creation into include/trace/ftrace.h and simplifies the work of developers in adding new tracepoints. Just the act of creating the trace points in include/trace and including define_trace.h will create the events in the debugfs/tracing/events directory. This patch removes the need of include/trace/trace_events.h Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 4 + include/trace/ftrace.h | 492 +++++++++++++++++++++++++++++++++++++++++++ include/trace/trace_events.h | 7 - 3 files changed, 496 insertions(+), 7 deletions(-) create mode 100644 include/trace/ftrace.h delete mode 100644 include/trace/trace_events.h (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index de9dc7d8508b..980eb66a6e38 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,6 +56,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#ifdef CONFIG_EVENT_TRACER +#include +#endif + #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h new file mode 100644 index 000000000000..955b967acd74 --- /dev/null +++ b/include/trace/ftrace.h @@ -0,0 +1,492 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in to include the following: + * + * struct ftrace_raw_ { + * struct trace_entry ent; + * ; + * []; + * [...] + * }; + * + * The is created by the __field(type, item) macro or + * the __array(type2, item2, len) macro. + * We simply do "type item;", and that will create the fields + * in the structure. + */ + +#include + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __field +#define __field(type, item) type item; + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 2 of the trace events. + * + * Override the macros in to include the following: + * + * enum print_line_t + * ftrace_raw_output_(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_ *field; <-- defined in stage 1 + * struct trace_entry *entry; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * ret = trace_seq_printf(s, "\n"); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + ret = trace_seq_printf(s, #call ": " print); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry REC + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * + * Override the macros in to include the following: + * + * static void ftrace_event_(proto) + * { + * event_trace_printk(_RET_IP_, ": " ); + * } + * + * static int ftrace_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_event_); + * } + * + * For those macros defined with TRACE_FORMAT: + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * } + * + * + * For those macros defined with TRACE_EVENT: + * + * static struct ftrace_event_call event_; + * + * static void ftrace_raw_event_(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the __field and + * __array macros. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_raw_event_); + * } + * + * static struct trace_event ftrace_event_type_ = { + * .trace = ftrace_raw_output_, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .system = "", + * .raw_init = ftrace_raw_init_event_, + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * .show_format = ftrace_format_, + * } + * + */ + +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args + +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +{ \ + if (atomic_add_negative(-1, &call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + +#define _TRACE_FORMAT(call, proto, args, fmt) \ +static void ftrace_event_##call(proto) \ +{ \ + event_trace_printk(_RET_IP_, #call ": " fmt); \ +} \ + \ +static int ftrace_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_event_##call); \ +} \ + \ +static struct ftrace_event_call event_##call; \ + \ +static int ftrace_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(NULL); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + return 0; \ +} + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_init_event_##call, \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#undef __entry +#define __entry entry + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ftrace_event_call *call = &event_##call; \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(struct ftrace_raw_##call), \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + assign; \ + \ + if (!filter_current_check_discard(call, entry, event)) \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h deleted file mode 100644 index 13d6b85668cf..000000000000 --- a/include/trace/trace_events.h +++ /dev/null @@ -1,7 +0,0 @@ -/* trace/.h here */ - -#include -#include -#include -#include -#include -- cgit v1.2.3 From a59fd6027218bd7c994e39d14afe0242f895144f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 13:52:20 -0400 Subject: tracing/events: convert event call sites to use a link list Impact: makes it possible to define events in modules The events are created by reading down the section that they are linked in by the macros. But this is not scalable to modules. This patch converts the manipulations to use a global link list, and on boot up it adds the items in the section to the list. This change will allow modules to add their tracing events to the list as well. Note, this change alone does not permit modules to use the TRACE_EVENT macros, but the change is needed for them to eventually do so. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 496b76d9f9d8..17810853b4f8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -83,6 +83,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); struct ftrace_event_call { + struct list_head list; char *name; char *system; struct dentry *dir; -- cgit v1.2.3 From 6d723736e472f7a0cd5b62c84152fceead241328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 14:53:50 -0400 Subject: tracing/events: add support for modules to TRACE_EVENT Impact: allow modules to add TRACE_EVENTS on load This patch adds the final hooks to allow modules to use the TRACE_EVENT macro. A notifier and a data structure are used to link the TRACE_EVENTs defined in the module to connect them with the ftrace event tracing system. It also adds the necessary automated clean ups to the trace events when a module is removed. Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 +++ include/linux/module.h | 4 ++++ include/linux/trace_seq.h | 2 ++ include/trace/ftrace.h | 1 + 4 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 17810853b4f8..75f3ac01a87c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -7,6 +7,7 @@ struct trace_array; struct tracer; +struct dentry; /* * The trace entry - the most basic unit of tracing. This is what @@ -87,6 +88,7 @@ struct ftrace_event_call { char *name; char *system; struct dentry *dir; + struct trace_event *event; int enabled; int (*regfunc)(void); void (*unregfunc)(void); @@ -97,6 +99,7 @@ struct ftrace_event_call { struct list_head fields; int n_preds; struct filter_pred **preds; + void *mod; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; diff --git a/include/linux/module.h b/include/linux/module.h index 627ac082e2a6..6155fa44168b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -337,6 +337,10 @@ struct module const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif +#ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *trace_events; + unsigned int num_trace_events; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 28051da876dd..15ca2c71af13 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TRACE_SEQ_H #define _LINUX_TRACE_SEQ_H +#include + /* * Trace sequences are used to allow a function to call several other functions * to create a string of data to use (up to a max of PAGE_SIZE. diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 955b967acd74..60c5323bee64 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -477,6 +477,7 @@ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ -- cgit v1.2.3 From ecda8ae02a08ef065ff387f5cb2a2d4999da2408 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 18:49:38 -0400 Subject: tracing/events: fix lockdep system name Impact: fix compile error of lockdep event tracer Ingo Molnar pointed out that the system name for the lockdep tracer was "lock" which is used to include the event trace file name. It should be "lockdep" Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/trace/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 4d301e758de3..45e326b5c7f3 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -5,7 +5,7 @@ #include #undef TRACE_SYSTEM -#define TRACE_SYSTEM lock +#define TRACE_SYSTEM lockdep #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 19:39:12 -0400 Subject: tracing/events: move trace point headers into include/trace/events Impact: clean up Create a sub directory in include/trace called events to keep the trace point headers in their own separate directory. Only headers that declare trace points should be defined in this directory. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/linux/kmemtrace.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/irq.h | 57 +++++++ include/trace/events/kmem.h | 194 +++++++++++++++++++++++ include/trace/events/lockdep.h | 60 ++++++++ include/trace/events/sched.h | 339 +++++++++++++++++++++++++++++++++++++++++ include/trace/events/skb.h | 40 +++++ include/trace/irq.h | 57 ------- include/trace/kmem.h | 194 ----------------------- include/trace/lockdep.h | 60 -------- include/trace/sched.h | 339 ----------------------------------------- include/trace/skb.h | 40 ----- 12 files changed, 692 insertions(+), 692 deletions(-) create mode 100644 include/trace/events/irq.h create mode 100644 include/trace/events/kmem.h create mode 100644 include/trace/events/lockdep.h create mode 100644 include/trace/events/sched.h create mode 100644 include/trace/events/skb.h delete mode 100644 include/trace/irq.h delete mode 100644 include/trace/kmem.h delete mode 100644 include/trace/lockdep.h delete mode 100644 include/trace/sched.h delete mode 100644 include/trace/skb.h (limited to 'include') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h index 15c45a27a925..b616d3930c3b 100644 --- a/include/linux/kmemtrace.h +++ b/include/linux/kmemtrace.h @@ -9,7 +9,7 @@ #ifdef __KERNEL__ -#include +#include #ifdef CONFIG_KMEMTRACE extern void kmemtrace_init(void); diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 980eb66a6e38..18869417109c 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -43,7 +43,7 @@ #endif #ifndef TRACE_INCLUDE_PATH -# define __TRACE_INCLUDE(system) +# define __TRACE_INCLUDE(system) # define UNDEF_TRACE_INCLUDE_FILE #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 000000000000..75e3468e4493 --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 000000000000..c22c42f980b5 --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,194 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 000000000000..45e326b5c7f3 --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,60 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h new file mode 100644 index 000000000000..ffa1cab586b9 --- /dev/null +++ b/include/trace/events/sched.h @@ -0,0 +1,339 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched + +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 000000000000..1e8fabb57c06 --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index 75e3468e4493..000000000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,57 +0,0 @@ -#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_IRQ_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#endif /* _TRACE_IRQ_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h deleted file mode 100644 index c22c42f980b5..000000000000 --- a/include/trace/kmem.h +++ /dev/null @@ -1,194 +0,0 @@ -#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KMEM_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); -#endif /* _TRACE_KMEM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 45e326b5c7f3..000000000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,60 +0,0 @@ -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index ffa1cab586b9..000000000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,339 +0,0 @@ -#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SCHED_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#endif /* _TRACE_SCHED_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index 1e8fabb57c06..000000000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,40 +0,0 @@ -#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SKB_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#endif /* _TRACE_SKB_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From 05725f7eb4b8acb147c5fc7b91397b1f6bcab00d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Apr 2009 20:17:16 +0200 Subject: rculist: use list_entry_rcu in places where it's appropriate Use previously introduced list_entry_rcu instead of an open-coded list_entry + rcu_dereference combination. Signed-off-by: Jiri Pirko Reviewed-by: Paul E. McKenney Cc: dipankar@in.ibm.com LKML-Reference: <20090414181715.GA3634@psychotron.englab.brq.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049c..886df41e7452 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -77,6 +77,7 @@ struct sched_param { #include #include #include +#include #include #include @@ -2010,7 +2011,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) @@ -2049,8 +2051,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2) static inline struct task_struct *next_thread(const struct task_struct *p) { - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) -- cgit v1.2.3 From e6a1a89d572c31b62d6dcf11a371c7323852d9b2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 15 Apr 2009 18:22:41 +0900 Subject: dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries We use a static value for the number of dma_debug_entries. It can be overwritten by a kernel command line option. Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel command line option because they can't know such value until they finish initializing up their hardware. This patch adds dma_debug_resize_entries() enables IOMMUs to adjust the number of dma_debug_entries anytime. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Cc: fujita.tomonori@lab.ntt.co.jp Cc: akpm@linux-foundation.org LKML-Reference: <20090415182234R.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/dma-debug.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 28d53cb7b5a2..171ad8aedc83 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern void dma_debug_init(u32 num_entries); +extern int dma_debug_resize_entries(u32 num_entries); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries) { } +static inline int dma_debug_resize_entries(u32 num_entries) +{ + return 0; +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, -- cgit v1.2.3 From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001 From: Shawn Du Date: Tue, 14 Apr 2009 13:58:56 +0800 Subject: blktrace: support per-partition tracing Though one can specify '-d /dev/sda1' when using blktrace, it still traces the whole sda. To support per-partition tracing, when we start tracing, we initialize bt->start_lba and bt->end_lba to the start and end sector of that partition. Note some actions are per device, thus we don't filter 0-sector events. The original patch and discussion can be found here: http://marc.info/?l=linux-btrace&m=122949374214540&w=2 Signed-off-by: Shawn Du Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Jens Axboe LKML-Reference: <49E42620.4050701@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d960889e92ef..267edc4017ee 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -165,8 +165,9 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, - char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern int do_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** @@ -193,6 +194,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); @@ -200,15 +202,15 @@ extern int blk_trace_remove(struct request_queue *q); extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ -#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) -#define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) -#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) -#define blk_trace_startstop(q, start) (-ENOTTY) -#define blk_trace_remove(q) (-ENOTTY) -#define blk_add_trace_msg(q, fmt, ...) do { } while (0) - +# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +# define blk_trace_shutdown(q) do { } while (0) +# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) +# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) +# define blk_trace_startstop(q, start) (-ENOTTY) +# define blk_trace_remove(q) (-ENOTTY) +# define blk_add_trace_msg(q, fmt, ...) do { } while (0) #endif /* CONFIG_BLK_DEV_IO_TRACE */ + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 14:00:05 +0800 Subject: blktrace: add trace/ to /sys/block/sda Impact: allow ftrace-plugin blktrace to trace device-mapper devices To trace a single partition: # echo 1 > /sys/block/sda/sda1/enable To trace the whole sda instead: # echo 1 > /sys/block/sda/enable Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace can't be used to trace device-mapper devices. Now: # echo 1 > /sys/block/dm-0/trace/enable echo: write error: No such device or address # mount -t ext4 /dev/dm-0 /mnt # echo 1 > /sys/block/dm-0/trace/enable # echo blk > /debug/tracing/current_tracer Reported-by: Theodore Tso Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42665.6020506@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 267edc4017ee..62763c952854 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -210,6 +211,11 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +static inline int blk_trace_init_sysfs(struct device *dev) +{ + return 0; +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 93eb677d74a4f7d3edfb678c94f6c0544d9fbad2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 13:24:06 -0400 Subject: ftrace: use module notifier for function tracer The hooks in the module code for the function tracer must be called before any of that module code runs. The function tracer hooks modify the module (replacing calls to mcount to nops). If the code is executed while the change occurs, then the CPU can take a GPF. To handle the above with a bit of paranoia, I originally implemented the hooks as calls directly from the module code. After examining the notifier calls, it looks as though the start up notify is called before any of the module's code is executed. This makes the use of the notify safe with ftrace. Only the startup notify is required to be "safe". The shutdown simply removes the entries from the ftrace function list, and does not modify any code. This change has another benefit. It removes a issue with a reverse dependency in the mutexes of ftrace_lock and module_mutex. [ Impact: fix lock dependency bug, cleanup ] Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ------- include/linux/module.h | 4 ++++ 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 53869bef6102..97c83e1bc589 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); -extern void ftrace_release(void *start, unsigned long size); - extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } -static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } #endif /* diff --git a/include/linux/module.h b/include/linux/module.h index 6155fa44168b..a8f2c0aa4c32 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -341,6 +341,10 @@ struct module struct ftrace_event_call *trace_events; unsigned int num_trace_events; #endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + unsigned long *ftrace_callsites; + unsigned int num_ftrace_callsites; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ -- cgit v1.2.3 From d1b182a8d49ed6416325b4e0a1cb0f17cd4e702a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 16:53:47 -0400 Subject: tracing/events/ring-buffer: expose format of ring buffer headers to users Currently, every thing needed to read the binary output from the ring buffers is available, with the exception of the way the ring buffers handles itself internally. This patch creates two special files in the debugfs/tracing/events directory: # cat /debug/tracing/events/header_page field: u64 timestamp; offset:0; size:8; field: local_t commit; offset:8; size:8; field: char data; offset:16; size:4080; # cat /debug/tracing/events/header_event type : 2 bits len : 3 bits time_delta : 27 bits array : 32 bits padding : type == 0 time_extend : type == 1 data : type == 3 This is to allow a userspace app to see if the ring buffer format changes or not. [ Impact: allow userspace apps to know of ringbuffer format changes ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f0aa486d131c..fac8f1ac6f49 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -166,6 +166,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); +struct trace_seq; + +int ring_buffer_print_entry_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_seq *s); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; -- cgit v1.2.3 From 76aa81118ddfbb3dc31533030cf3ec329dd067a6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 16 Apr 2009 23:35:39 -0700 Subject: tracing: avoid warnings from zero-arg tracepoints Tracepoints with no arguments can issue two warnings: "field" defined by not used "ret" is uninitialized in this function Mark field as being OK to leave unused, and initialize ret. [ Impact: fix false positive compiler warnings. ] Signed-off-by: Jeremy Fitzhardinge Acked-by: Steven Rostedt Cc: mathieu.desnoyers@polymtl.ca LKML-Reference: <1239950139-1119-5-git-send-email-jeremy@goop.org> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 60c5323bee64..39a3351f2e7f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -160,8 +160,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ static int \ ftrace_format_##call(struct trace_seq *s) \ { \ - struct ftrace_raw_##call field; \ - int ret; \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ \ tstruct; \ \ -- cgit v1.2.3 From 46de405f25f1d9fa73b657ffbb752aa0cc87a91d Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 17 Apr 2009 10:53:43 +0800 Subject: tracing: Remove include/trace/kmem_event_types.h kmem_event_types.h is no longer necessary since tracepoint definitions are put into include/trace/events/kmem.h [ Impact: remove now-unused file. ] Signed-off-by: Zhao Lei Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49E7EF37.2080205@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/kmem_event_types.h | 193 --------------------------------------- 1 file changed, 193 deletions(-) delete mode 100644 include/trace/kmem_event_types.h (limited to 'include') diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h deleted file mode 100644 index 4ff420fe4675..000000000000 --- a/include/trace/kmem_event_types.h +++ /dev/null @@ -1,193 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -#undef TRACE_SYSTEM -- cgit v1.2.3 From b0afdc126d0515e76890f0a5f26b28501cfa298e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 13:02:22 -0400 Subject: tracing/events: enable code with EVENT_TRACING not EVENT_TRACER The CONFIG_EVENT_TRACER is the way to turn on event tracing when no other tracing has been configured. All code to get enabled should depend on CONFIG_EVENT_TRACING. That is what is enabled when TRACING (or CONFIG_EVENT_TRACER) is selected. This patch enables the include/trace/ftrace.h file when CONFIG_EVENT_TRACING is enabled. [ Impact: fix warning in event tracer selftest ] Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 18869417109c..7f1f23d601ec 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,7 +56,7 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #include #endif -- cgit v1.2.3 From 261842b7c9099f56de2eb969c8ad65402d68e00e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Apr 2009 21:41:52 -0400 Subject: tracing: add same level recursion detection The tracing infrastructure allows for recursion. That is, an interrupt may interrupt the act of tracing an event, and that interrupt may very well perform its own trace. This is a recursive trace, and is fine to do. The problem arises when there is a bug, and the utility doing the trace calls something that recurses back into the tracer. This recursion is not caused by an external event like an interrupt, but by code that is not expected to recurse. The result could be a lockup. This patch adds a bitmask to the task structure that keeps track of the trace recursion. To find the interrupt depth, the following algorithm is used: level = hardirq_count() + softirq_count() + in_nmi; Here, level will be the depth of interrutps and softirqs, and even handles the nmi. Then the corresponding bit is set in the recursion bitmask. If the bit was already set, we know we had a recursion at the same level and we warn about it and fail the writing to the buffer. After the data has been committed to the buffer, we clear the bit. No atomics are needed. The only races are with interrupts and they reset the bitmask before returning anywy. [ Impact: detect same irq level trace recursion ] Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 +++++++ include/linux/init_task.h | 1 + include/linux/sched.h | 4 +++- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 97c83e1bc589..39b95c56587e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -488,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) extern int ftrace_dump_on_oops; +#ifdef CONFIG_PREEMPT +#define INIT_TRACE_RECURSION .trace_recursion = 0, +#endif + #endif /* CONFIG_TRACING */ +#ifndef INIT_TRACE_RECURSION +#define INIT_TRACE_RECURSION +#endif #ifdef CONFIG_HW_BRANCH_TRACER diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dcfb93337e9a..6fc218529863 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -187,6 +187,7 @@ extern struct cred init_cred; INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ + INIT_TRACE_RECURSION \ } diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049c..7ede5e490913 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1428,7 +1428,9 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; -#endif + /* bitmask of trace recursion */ + unsigned long trace_recursion; +#endif /* CONFIG_TRACING */ }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ -- cgit v1.2.3 From 8e668b5b3455207e4540fc7ccab9ecf70142f288 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 17:17:55 -0400 Subject: tracing: remove format attribute of inline function Due to a cut and paste error, I added the gcc attribute for printf format to the static inline stub of trace_seq_printf. This will cause a compile failure. [ Impact: fix compiler error when CONFIG_TRACING is off ] Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: =?ISO-8859-15?Q?Fr=E9d=E9ric_Weisbecker?= LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/trace_seq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 15ca2c71af13..37db9bdfbc1a 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -42,7 +42,6 @@ extern int trace_seq_path(struct trace_seq *s, struct path *path); #else /* CONFIG_TRACING */ static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))) { return 0; } -- cgit v1.2.3 From 46a802e852c2106d755f697819ea80cd3ffdc222 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide kill unused ide_cmd->special Impact: removal of unused field No one uses ide_cmd->special anymore. Kill it. Signed-off-by: Tejun Heo --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078f..846a1e132407 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -324,7 +324,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ -- cgit v1.2.3 From a1df5169f9bf08f6067029bfb840a05e282b1b97 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide: add helpers for preparing sense requests This is in preparation of removing the queueing of a sense request out of the IRQ handler path. Use struct request_sense as a general sense buffer for all ATAPI devices ide-{floppy,tape,cd}. tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as it can cause deadlock. Converted to use inline struct request and blk_rq_init(). * Added xfer / cdb len selection depending on device type. * All sense prep logics folded into ide_prep_sense() which never fails. * hwif->rq clearing and sense_rq used handling moved into ide_queue_sense_rq(). * blk_rq_map_kern() conversion is moved to later patch. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- include/linux/ide.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 846a1e132407..a69ccac56411 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include #include +/* for request_sense */ +#include + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -602,6 +605,11 @@ struct ide_drive_s { struct ide_atapi_pc request_sense_pc; struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +void ide_queue_sense_rq(ide_drive_t *drive, void *special); + int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v1.2.3 From 6b544fcc8cd0a04eb42de9d1ecdd345e979d6ada Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense buffer Since we're issuing REQ_TYPE_SENSE now we need to allow those types of rqs in the ->do_request callbacks. As a future improvement, sense_len assignment might be unified across all ATAPI devices. Borislav to check with specs and test. As a result, get rid of ide_queue_pc_head() and drive->request_sense_rq. tj: * Init request sense ide_atapi_pc from sense request. In the longer timer, it would probably better to fold ide_create_request_sense_cmd() into its only current user - ide_floppy_get_format_progress(). * ide_retry_pc() no longer takes @disk. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index a69ccac56411..9e67ccac3c1f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -604,7 +604,6 @@ struct ide_drive_s { unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; /* current sense rq and buffer */ bool sense_rq_armed; @@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); void ide_queue_sense_rq(ide_drive_t *drive, void *special); -- cgit v1.2.3 From 5c4be57249e2e09136446597d2fe2a967c6ffef0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd,atapi: use bio for internal commands Impact: unify request data buffer handling rq->data is used mostly to pass kernel buffer through request queue without using bio. There are only a couple of places which still do this in kernel and converting to bio isn't difficult. This patch converts ide-cd and atapi to use bio instead of rq->data for request sense and internal pc commands. With previous change to unify sense request handling, this is relatively easily achieved by adding blk_rq_map_kern() during sense_rq prep and PC issue. If blk_rq_map_kern() fails for sense, the error is deferred till sense issue and aborts the failed command which triggered the sense. Note that this is a slim possibility as sense prep is done on each command issue, so for the above condition to actually trigger, all preps since the last sense issue till the issue of the request which would require a sense should fail. * do_request functions might sleep now. This should be okay as ide request_fn - do_ide_request() - is invoked only from make_request and plug work. Make sure this is the case by adding might_sleep() to do_ide_request(). * Functions which access the read sense data before the sense request is complete now should access bio_data(sense_rq->bio) as the sense buffer might have been copied during blk_rq_map_kern(). * ide-tape updated to map sg. * cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC special case. Simplified. * tp_ops->output/input_data path dropped from ide_pc_intr(). Signed-off-by: Tejun Heo --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9e67ccac3c1f..1957461ac762 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); -void ide_queue_sense_rq(ide_drive_t *drive, void *special); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); -- cgit v1.2.3 From 6d7003877c2f0578f1c08f66d05c3f72ef4ae596 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-atapi: kill unused fields and callbacks Impact: remove fields and code paths which are no longer necessary Now that ide-tape uses standard mechanisms to transfer data, special case handling for bh handling can be dropped from ide-atapi. Drop the followings. * pc->cur_pos, b_count, bh and b_data * drive->pc_update_buffers() and pc_io_buffers(). Signed-off-by: Tejun Heo --- include/linux/ide.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1957461ac762..34c128f0a33c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -362,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -379,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -595,10 +587,6 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; -- cgit v1.2.3 From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:14 +0800 Subject: x86, intr-remap: enable interrupt remapping early Currently, when x2apic is not enabled, interrupt remapping will be enabled in init_dmars(), where it is too late to remap ioapic interrupts, that is, ioapic interrupts are really in compatibility mode, not remappable mode. This patch always enables interrupt remapping before ioapic setup, it guarantees all interrupts will be remapped when interrupt remapping is enabled. Thus it doesn't need to set the compatibility interrupt bit. [ Impact: refactor intr-remap init sequence, enable fuller remap mode ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e397dc342cda..06f592a7f73c 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -108,6 +108,7 @@ struct irte { }; #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; +extern int intr_remapping_supported(void); extern int enable_intr_remapping(int); extern void disable_intr_remapping(void); extern int reenable_intr_remapping(int); -- cgit v1.2.3 From 23de29de2d8b227943be191d59fb6d983996d55e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 12:59:29 -0400 Subject: tracing: remove dangling semicolon Due to a cut and paste error, the trace_seq_putc had a semicolon after the prototype but before the stub function when tracing is disabled. [Impact: fix compile error ] Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 37db9bdfbc1a..ba9627f00d3f 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -63,7 +63,7 @@ static inline int trace_seq_puts(struct trace_seq *s, const char *str) { return 0; } -static inline int trace_seq_putc(struct trace_seq *s, unsigned char c); +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c) { return 0; } -- cgit v1.2.3 From b75576d76d4be50196773f36709cb7a4f5ac2ab7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Apr 2009 17:56:13 +0100 Subject: ASoC: Make the DAPM power check an operation on the widget Rather than having switch statements at point of use make the DAPM power check a member of the widget structure and set it when we instantiate the widget. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index fcc929da0339..839a97b63269 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -367,6 +367,8 @@ struct snd_soc_dapm_widget { unsigned char suspend:1; /* was active before suspend */ unsigned char pmdown:1; /* waiting for timeout */ + int (*power_check)(struct snd_soc_dapm_widget *w); + /* external events */ unsigned short event_flags; /* flags to specify event types */ int (*event)(struct snd_soc_dapm_widget*, struct snd_kcontrol *, int); -- cgit v1.2.3 From f1f9b3b1795da8625e0e6096813c9d18d4a344ce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 20:38:21 +0200 Subject: perfcounters, sched: remove __task_delta_exec() This function was left orphan by the latest round of sw-counter cleanups. [ Impact: remove unused kernel function ] Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 080d1fd461d7..a77c6007dc99 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -85,7 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq) /* * Lock/unlock the current runqueue - to extract task statistics: */ -extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t, cputime_t); -- cgit v1.2.3 From cd474f2d548af3c0eb932d9d47ec11483861aa6f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 08:53:08 +0200 Subject: ALSA: Remove deprecated snd_card_new() Signed-off-by: Takashi Iwai --- include/sound/core.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 3dea79829acc..a26bbdcc6765 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -300,16 +300,6 @@ int snd_card_create(int idx, const char *id, struct module *module, int extra_size, struct snd_card **card_ret); -static inline __deprecated -struct snd_card *snd_card_new(int idx, const char *id, - struct module *module, int extra_size) -{ - struct snd_card *card; - if (snd_card_create(idx, id, module, extra_size, &card) < 0) - return NULL; - return card; -} - int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); -- cgit v1.2.3 From ef9dfa4b1052af23a603de382d4665b2d1fccc61 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 08:53:41 +0200 Subject: ALSA: Remove deprecated include/sound/driver.h Signed-off-by: Takashi Iwai --- include/sound/driver.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 include/sound/driver.h (limited to 'include') diff --git a/include/sound/driver.h b/include/sound/driver.h deleted file mode 100644 index f0359437d01a..000000000000 --- a/include/sound/driver.h +++ /dev/null @@ -1 +0,0 @@ -#warning "This file is deprecated" -- cgit v1.2.3 From fc1edaf9e7cc4d4696f83dee495b8f158d01c4eb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:27 -0700 Subject: x86: x2apic, IR: Clean up X86_X2APIC and INTR_REMAP config checks Add x2apic_supported() to clean up CONFIG_X86_X2APIC checks. Fix CONFIG_INTR_REMAP checks. [ Impact: cleanup ] Signed-off-by: Suresh Siddha Cc: dwmw2@infradead.org Cc: Suresh Siddha Cc: Weidong Han LKML-Reference: <20090420200450.128993000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 06f592a7f73c..10ff5c498824 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -158,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) } #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) +#define disable_intr_remapping() (0) +#define reenable_intr_remapping(mode) (0) #define intr_remapping_enabled (0) #endif -- cgit v1.2.3 From 7a4f453b6d7379a7c380825949977c5a838aa012 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Apr 2009 16:53:34 +0800 Subject: tracing/events: make struct trace_entry->type to be int type struct trace_entry->type is unsigned char, while trace event's id is int type, thus for a event with id >= 256, it's entry->type is cast to (id % 256), and then we can't see the trace output of this event. # insmod trace-events-sample.ko # echo foo_bar > /mnt/tracing/set_event # cat /debug/tracing/events/trace-events-sample/foo_bar/id 256 # cat /mnt/tracing/trace_pipe <...>-3548 [001] 215.091142: Unknown type 0 <...>-3548 [001] 216.089207: Unknown type 0 <...>-3548 [001] 217.087271: Unknown type 0 <...>-3548 [001] 218.085332: Unknown type 0 [ Impact: fix output for trace events with id >= 256 ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <49EEDB0E.5070207@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- include/trace/ftrace.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 75f3ac01a87c..2a4a40749911 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,7 +16,7 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - unsigned char type; + int type; unsigned char flags; unsigned char preempt_count; int pid; @@ -73,7 +73,7 @@ enum print_line_t { struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 39a3351f2e7f..15ef08d9add1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -198,7 +198,7 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(unsigned char, type); \ + __common_field(int, type); \ __common_field(unsigned char, flags); \ __common_field(unsigned char, preempt_count); \ __common_field(int, pid); \ -- cgit v1.2.3 From 9cbf117662e24c6d33245666804487f92c21b59d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 04:51:29 +0200 Subject: tracing/events: provide string with undefined size support This patch provides the support for dynamic size strings on event tracing. The key concept is to use a structure with an ending char array field of undefined size and use such ability to allocate the minimal size on the ring buffer to make one or more string entries fit inside, as opposite to a fixed length strings with upper bound. The strings themselves are represented using fields which have an offset value from the beginning of the entry. This patch provides three new macros: __string(item, src) This one declares a string to the structure inside TP_STRUCT__entry. You need to provide the name of the string field and the source that will be copied inside. This will also add the dynamic size of the string needed for the ring buffer entry allocation. A stack allocated structure is used to temporarily store the offset of each strings, avoiding double calls to strlen() on each event insertion. __get_str(field) This one will give you a pointer to the string you have created. This is an abstract helper to resolve the absolute address given the field name which is a relative address from the beginning of the trace_structure. __assign_str(dst, src) Use this macro to automatically perform the string copy from src to dst. src must be a variable to assign and dst is the name of a __string field. Example on how to use it: TRACE_EVENT(my_event, TP_PROTO(char *src1, char *src2), TP_ARGS(src1, src2), TP_STRUCT__entry( __string(str1, src1) __string(str2, src2) ), TP_fast_assign( __assign_str(str1, src1); __assign_str(str2, src2); ), TP_printk("%s %s", __get_str(src1), __get_str(src2)) ) Of course you can mix-up any __field or __array inside this TRACE_EVENT. The position of the __string or __assign_str doesn't matter. Changes in v2: Address the suggestion of Steven Rostedt: drop the opening_string() macro and redefine __ending_string() to get the size of the string to be copied instead of overwritting the whole ring buffer allocation. Changes in v3: Address other suggestions of Steven Rostedt and Peter Zijlstra with some changes: drop the __ending_string and the need to have only one string field. Use offsets instead of absolute addresses. [ Impact: allow more compact memory usage for string tracing ] Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan Cc: Peter Zijlstra --- include/trace/ftrace.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 15ef08d9add1..5a7d18c43634 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -27,6 +27,9 @@ #undef __field #define __field(type, item) type item; +#undef __string +#define __string(item, src) int __str_loc_##item; + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -35,14 +38,53 @@ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ + char __str_data[0]; \ }; \ static struct ftrace_event_call event_##name #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 2 of the trace events. * + * Include the following: + * + * struct ftrace_str_offsets_ { + * int ; + * int ; + * [...] + * }; + * + * The __string() macro will create each int , this is to + * keep the offset of each string from the beggining of the event + * once we perform the strlen() of the src strings. + * + */ + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef __array +#define __array(type, item, len) + +#undef __field +#define __field(type, item); + +#undef __string +#define __string(item, src) int item; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + struct ftrace_str_offsets_##call { \ + tstruct; \ + }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * * Override the macros in to include the following: * * enum print_line_t @@ -80,6 +122,9 @@ #undef TP_printk #define TP_printk(fmt, args...) fmt "\n", args +#undef __get_str +#define __get_str(field) (char *)__entry + __entry->__str_loc_##field + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ @@ -146,6 +191,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ if (!ret) \ return 0; +#undef __string +#define __string(item, src) \ + ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t" \ + "offset:%u;tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __str_loc_##item), \ + (unsigned int)sizeof(field.__str_loc_##item)); \ + if (!ret) \ + return 0; + #undef __entry #define __entry REC @@ -189,6 +244,12 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; +#undef __string +#define __string(item, src) \ + ret = trace_define_field(event_call, "__str_loc", #item, \ + offsetof(typeof(field), __str_loc_##item), \ + sizeof(field.__str_loc_##item)); + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ @@ -212,7 +273,7 @@ ftrace_define_fields_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* - * Stage 3 of the trace events. + * Stage 4 of the trace events. * * Override the macros in to include the following: * @@ -409,6 +470,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __entry #define __entry entry +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __string +#define __string(item, src) \ + __str_offsets.item = __str_size + \ + offsetof(typeof(*entry), __str_data); \ + __str_size += strlen(src) + 1; + +#undef __assign_str +#define __assign_str(dst, src) \ + __entry->__str_loc_##dst = __str_offsets.dst; \ + strcpy(__get_str(dst), src); + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ _TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ @@ -417,18 +495,22 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ + struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ struct ftrace_event_call *call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ + int __str_size = 0; \ int pc; \ \ local_save_flags(irq_flags); \ pc = preempt_count(); \ \ + tstruct; \ + \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ - irq_flags, pc); \ + sizeof(struct ftrace_raw_##call) + __str_size,\ + irq_flags, pc); \ if (!event) \ return; \ entry = ring_buffer_event_data(event); \ -- cgit v1.2.3 From 7e7ca9a22dbbc5c91763cd16923c7509918709b6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 04:54:49 +0200 Subject: tracing/lock: provide lock_acquired event support for dynamic size string Now that we can support the dynamic sized string, make the lock tracing able to use it, making it safe against modules removal and consuming the right amount of memory needed for each lock name Changes in v2: adapt to the __ending_string() updates and the opening_string() removal. [ Impact: protect lock tracer against module removal ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Steven Rostedt --- include/trace/events/lockdep.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h index 45e326b5c7f3..3ca315c1429d 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lockdep.h @@ -38,16 +38,16 @@ TRACE_EVENT(lock_acquired, TP_ARGS(lock, ip, waittime), TP_STRUCT__entry( - __field(const char *, name) + __string(name, lock->name) __field(unsigned long, wait_usec) __field(unsigned long, wait_nsec_rem) ), TP_fast_assign( - __entry->name = lock->name; + __assign_str(name, lock->name); __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); __entry->wait_usec = (unsigned long) waittime; ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, __entry->wait_nsec_rem) ); -- cgit v1.2.3 From 6a74aa40907757ec98d8710ff66cd4cfe064e7d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Apr 2009 00:41:09 +0200 Subject: tracing/events: protect __get_str() The __get_str() macro is used in a code part then its content should be protected with parenthesis. [ Impact: make macro definition more robust ] Reported-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 5a7d18c43634..a77f71a46dbe 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -123,7 +123,7 @@ #define TP_printk(fmt, args...) fmt "\n", args #undef __get_str -#define __get_str(field) (char *)__entry + __entry->__str_loc_##field +#define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -- cgit v1.2.3 From 246d0a17f5e09af0794960164269fc8988a8811c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 18:24:55 +0100 Subject: ASoC: Add power supply widget to DAPM Many modern CODECs have shared resources on chip which must be enabled for portions of the chip to work but which can be disabled at other times in order to achieve power savings. Examples of such resources include power supplies and some internal clocks. Since these widgets are dependencies for the audio path but do not carry audio signals they require slightly different handling to most widgets - they do not contribute to the audio path and so should not be counted as either inputs or outputs during path walks. Cases where one supply provides a supply for another will require additional work. There is also room for more optimisation of the graph walking to avoid repeated checks for the same thing. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 839a97b63269..533f9f256496 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -154,12 +154,16 @@ .shift = wshift, .invert = winvert, \ .event = wevent, .event_flags = wflags} -/* generic register modifier widget */ +/* generic widgets */ #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \ { .id = wid, .name = wname, .kcontrols = NULL, .num_kcontrols = 0, \ .reg = -((wreg) + 1), .shift = wshift, .mask = wmask, \ .on_val = won_val, .off_val = woff_val, .event = dapm_reg_event, \ .event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD} +#define SND_SOC_DAPM_SUPPLY(wname, wreg, wshift, winvert, wevent, wflags) \ +{ .id = snd_soc_dapm_supply, .name = wname, .reg = wreg, \ + .shift = wshift, .invert = winvert, .event = wevent, \ + .event_flags = wflags} /* dapm kcontrol types */ #define SOC_DAPM_SINGLE(xname, reg, shift, max, invert) \ @@ -308,6 +312,7 @@ enum snd_soc_dapm_type { snd_soc_dapm_vmid, /* codec bias/vmid - to minimise pops */ snd_soc_dapm_pre, /* machine specific pre widget - exec first */ snd_soc_dapm_post, /* machine specific post widget - exec last */ + snd_soc_dapm_supply, /* power/clock supply */ }; /* -- cgit v1.2.3 From 89ec0dee9eba6275d47be0b878cf5f6d5c2fb6eb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Mar 2009 11:03:29 -0400 Subject: tracing: increase size of number of possible events With the new event tracing registration, we must increase the number of events that can be registered. Currently the type field is only one byte, which leaves us only 256 possible events. Since we do not save the CPU number in the tracer anymore (it is determined by the per cpu ring buffer that is used) we have an extra byte to use. This patch increases the size of type from 1 byte (256 events) to 2 bytes (65,536 events). It also adds a WARN_ON_ONCE if we exceed that limit. [ Impact: allow more than 255 events ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2a4a40749911..07e0a6d64a24 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,13 +16,16 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - int type; + unsigned short type; unsigned char flags; unsigned char preempt_count; int pid; int tgid; }; +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: -- cgit v1.2.3 From c2518c4366f087ebc10b3919cb2461bbe4f42d0c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Apr 2009 23:26:18 -0400 Subject: tracing: fix cut and paste macro error In case a module uses the TRACE_EVENT macro for creating automated events in ftrace, it may choose to use a different file name than the defined system name, or choose to use a different path than the default "include/trace/events" include path. If this is done, then before including trace/define_trace.h the header would define either "TRACE_INCLUDE_FILE" for the file name or "TRACE_INCLUDE_PATH" for the include path. If it does not define these, then the define_trace.h defines them instead. If define trace defines them, then define_trace.h should also undefine them before exiting. To do this a macro is used to note this: #ifndef TRACE_INCLUDE_FILE # define TRACE_INCLUDE_FILE TRACE_SYSTEM # define UNDEF_TRACE_INCLUDE_FILE #endif [...] #ifdef UNDEF_TRACE_INCLUDE_FILE # undef TRACE_INCLUDE_FILE # undef UNDEF_TRACE_INCLUDE_FILE #endif The UNDEF_TRACE_INCLUDE_FILE acts as a CPP variable to know to undef the TRACE_INCLUDE_FILE before leaving define_trace.h. Unfortunately, due to cut and paste errors, the macros between FILE and PATH got mixed up. [ Impact: undef TRACE_INCLUDE_FILE and/or TRACE_INCLUDE_PATH when needed ] Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 7f1f23d601ec..abc611feeb8c 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -44,7 +44,7 @@ #ifndef TRACE_INCLUDE_PATH # define __TRACE_INCLUDE(system) -# define UNDEF_TRACE_INCLUDE_FILE +# define UNDEF_TRACE_INCLUDE_PATH #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) #endif @@ -64,13 +64,13 @@ /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE -# undef TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_FILE # undef UNDEF_TRACE_INCLUDE_FILE #endif -#ifdef UNDEF_TRACE_INCLUDE_FILE +#ifdef UNDEF_TRACE_INCLUDE_PATH # undef TRACE_INCLUDE_PATH -# undef UNDEF_TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_PATH #endif /* We may be processing more files */ -- cgit v1.2.3 From 334d4169a6592d3fcd863bbe822a8f6985ffa9af Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 24 Apr 2009 11:27:05 +0800 Subject: ring_buffer: compressed event header RB_MAX_SMALL_DATA = 28bytes is too small for most tracers, it wastes an 'u32' to save the actually length for events which data size > 28. This fix uses compressed event header and enlarges RB_MAX_SMALL_DATA. [ Impact: saves about 0%-12.5%(depends on tracer) memory in ring_buffer ] Signed-off-by: Lai Jiangshan LKML-Reference: <49F13189.3090000@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fac8f1ac6f49..1c2f80911fbe 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -11,7 +11,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - u32 type:2, len:3, time_delta:27; + u32 type_len:5, time_delta:27; u32 array[]; }; @@ -24,7 +24,8 @@ struct ring_buffer_event { * size is variable depending on how much * padding is needed * If time_delta is non zero: - * everything else same as RINGBUF_TYPE_DATA + * array[0] holds the actual length + * size = 4 + length (bytes) * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -35,22 +36,23 @@ struct ring_buffer_event { * array[1..2] = tv_sec * size = 16 bytes * - * @RINGBUF_TYPE_DATA: Data record - * If len is zero: + * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: + * Data record + * If type_len is zero: * array[0] holds the actual length * array[1..(length+3)/4] holds data - * size = 4 + 4 + length (bytes) + * size = 4 + length (bytes) * else - * length = len << 2 + * length = type_len << 2 * array[0..(length+3)/4-1] holds data * size = 4 + length (bytes) */ enum ring_buffer_type { + RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ RINGBUF_TYPE_TIME_STAMP, - RINGBUF_TYPE_DATA, }; unsigned ring_buffer_event_length(struct ring_buffer_event *event); -- cgit v1.2.3 From 1cb81b143fa8f0e4629f10690862e2e52ca792ff Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 24 Apr 2009 09:51:43 +0200 Subject: x86, bts, mm: clean up buffer allocation The current mm interface is asymetric. One function allocates a locked buffer, another function only refunds the memory. Change this to have two functions for accounting and refunding locked memory, respectively; and do the actual buffer allocation in ptrace. [ Impact: refactor BTS buffer allocation code ] Signed-off-by: Markus Metzger Acked-by: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <20090424095143.A30265@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a3963ba23a6d..009eabd3c21c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -19,6 +19,7 @@ struct anon_vma; struct file_ra_state; struct user_struct; struct writeback_control; +struct rlimit; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -1319,7 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); -extern void *alloc_locked_buffer(size_t size); -extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); +extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size); +extern void refund_locked_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 39517091f88fae32b52254b561ced78da1eaf0a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:05:52 -0400 Subject: tracing/lockdep: convert lockdep to use TRACE_EVENT macro The TRACE_FORMAT will soon be deprecated. This patch converts it to the TRACE_EVENT macro. Note, this change should also speed up the tracing. [ Impact: remove a user of deprecated TRACE_FORMAT ] Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/trace/events/lockdep.h | 56 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h index 3ca315c1429d..0e956c9dfd7e 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lockdep.h @@ -9,28 +9,64 @@ #ifdef CONFIG_LOCKDEP -TRACE_FORMAT(lock_acquire, +TRACE_EVENT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); -TRACE_FORMAT(lock_release, + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); #ifdef CONFIG_LOCK_STAT -TRACE_FORMAT(lock_contended, +TRACE_EVENT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); TRACE_EVENT(lock_acquired, TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), -- cgit v1.2.3 From 160031b556e93590fa8635210d73d93c3d3853a9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:26:55 -0400 Subject: tracing/irq: convert irq traces to use TRACE_EVENT macro The TRACE_FORMAT will soon be deprecated. This patch converts it to the TRACE_EVENT macro. Note, this change should also speed up the tracing. [ Impact: remove a user of deprecated TRACE_FORMAT ] Cc: Jason Baron Signed-off-by: Steven Rostedt --- include/trace/events/irq.h | 57 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 75e3468e4493..768686467518 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -10,11 +10,24 @@ /* * Tracepoint for entry of interrupt handler: */ -TRACE_FORMAT(irq_handler_entry, +TRACE_EVENT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); + + TP_STRUCT__entry( + __field( int, irq ) + __string( name, action->name ) + ), + + TP_fast_assign( + __entry->irq = irq; + __assign_str(name, action->name); + ), + + TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) +); /* * Tracepoint for return of an interrupt handler: @@ -39,17 +52,43 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -TRACE_FORMAT(softirq_entry, +TRACE_EVENT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); -TRACE_FORMAT(softirq_exit, + TP_STRUCT__entry( + __field( int, vec ) + __string( name, softirq_to_name[h-vec] ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + __assign_str(name, softirq_to_name[h-vec]); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) +); + +TRACE_EVENT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); + + TP_STRUCT__entry( + __field( int, vec ) + __string( name, softirq_to_name[h-vec] ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + __assign_str(name, softirq_to_name[h-vec]); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) +); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.3 From b8e65554d80b4c560d201362d0e8fa02109d89fd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:50:39 -0400 Subject: tracing: remove deprecated TRACE_FORMAT The TRACE_FORMAT macro has been deprecated by the TRACE_EVENT macro. There are no more users. All new users must use the TRACE_EVENT macro. [ Impact: remove old functionality ] Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 5 ---- include/trace/define_trace.h | 4 --- include/trace/ftrace.h | 66 -------------------------------------------- 3 files changed, 75 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4353f3f7e624..14df7e635d43 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -158,11 +158,6 @@ static inline void tracepoint_synchronize_unregister(void) #define PARAMS(args...) args -#ifndef TRACE_FORMAT -#define TRACE_FORMAT(name, proto, args, fmt) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index abc611feeb8c..f7a7ae1e8f90 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,10 +26,6 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) -#undef TRACE_FORMAT -#define TRACE_FORMAT(name, proto, args, print) \ - DEFINE_TRACE(name) - #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a77f71a46dbe..1e681142f1da 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,9 +18,6 @@ #include -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - #undef __array #define __array(type, item, len) type item[len]; @@ -62,9 +59,6 @@ * */ -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - #undef __array #define __array(type, item, len) @@ -298,16 +292,6 @@ ftrace_define_fields_##call(void) \ * unregister_trace_(ftrace_event_); * } * - * For those macros defined with TRACE_FORMAT: - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * } - * * * For those macros defined with TRACE_EVENT: * @@ -417,56 +401,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ #define _TRACE_PROFILE_INIT(call) #endif -#define _TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, #call ": " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call event_##call; \ - \ -static int ftrace_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(NULL); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - return 0; \ -} - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_init_event_##call, \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - #undef __entry #define __entry entry -- cgit v1.2.3 From 7629ad24f2b3df95c8b4cd8869e3c04e1df6c442 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 24 Apr 2009 16:37:44 +0200 Subject: ASoC: add SOC_DOUBLE_EXT macro Add a macro for double controls with special callback functions. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- include/sound/soc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index b1f2f8819fea..6ab80bf7abd2 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -118,6 +118,14 @@ .info = snd_soc_info_volsw, \ .get = xhandler_get, .put = xhandler_put, \ .private_value = SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) } +#define SOC_DOUBLE_EXT(xname, xreg, shift_left, shift_right, xmax, xinvert,\ + xhandler_get, xhandler_put) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\ + .info = snd_soc_info_volsw, \ + .get = xhandler_get, .put = xhandler_put, \ + .private_value = (unsigned long)&(struct soc_mixer_control) \ + {.reg = xreg, .shift = shift_left, .rshift = shift_right, \ + .max = xmax, .invert = xinvert} } #define SOC_SINGLE_EXT_TLV(xname, xreg, xshift, xmax, xinvert,\ xhandler_get, xhandler_put, tlv_array) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ -- cgit v1.2.3 From 060fa5c83e67901ba47ab484cfcdb32737d630ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 12:20:52 -0400 Subject: tracing/events: reuse trace event ids after overflow With modules being able to add trace events, and the max trace event counter is 16 bits (65536) we can overflow the counter easily with a simple while loop adding and removing modules that contain trace events. This patch links together the registered trace events and on overflow searches for available trace event ids. It will still fail if over 65536 events are registered, but considering that a typical kernel only has 22000 functions, 65000 events should be sufficient. Reported-by: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 07e0a6d64a24..78a9ba24cbf6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -56,6 +56,7 @@ typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, int flags); struct trace_event { struct hlist_node node; + struct list_head list; int type; trace_print_func trace; trace_print_func raw; -- cgit v1.2.3 From e686307fdc84f249490e6c9da92fcb2424491f14 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 17 Apr 2009 08:41:21 +0200 Subject: loop: use BIO list management functions Now that the bio list management stuff is generic, convert loop to use bio lists instead of its own private bio list implementation. Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Akinobu Mita Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/loop.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b214fd672a2..f37ca8c726ba 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -506,7 +506,7 @@ static inline int bio_has_data(struct bio *bio) } /* - * BIO list managment for use by remapping drivers (e.g. DM or MD). + * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow diff --git a/include/linux/loop.h b/include/linux/loop.h index 40725447f5e0..66c194e2d9b9 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -56,8 +56,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; + struct bio_list lo_bio_list; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; -- cgit v1.2.3 From 214ae19104141404f18ad6651752eb38e3dd584e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide kill unused ide_cmd->special Impact: removal of unused field No one uses ide_cmd->special anymore. Kill it. Signed-off-by: Tejun Heo --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078f..846a1e132407 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -324,7 +324,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ -- cgit v1.2.3 From e69d800f7e8797a8e3423380ee9d8ca1cb90c388 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide: add helpers for preparing sense requests This is in preparation of removing the queueing of a sense request out of the IRQ handler path. Use struct request_sense as a general sense buffer for all ATAPI devices ide-{floppy,tape,cd}. tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as it can cause deadlock. Converted to use inline struct request and blk_rq_init(). * Added xfer / cdb len selection depending on device type. * All sense prep logics folded into ide_prep_sense() which never fails. * hwif->rq clearing and sense_rq used handling moved into ide_queue_sense_rq(). * blk_rq_map_kern() conversion is moved to later patch. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- include/linux/ide.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 846a1e132407..a69ccac56411 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include #include +/* for request_sense */ +#include + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -602,6 +605,11 @@ struct ide_drive_s { struct ide_atapi_pc request_sense_pc; struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +void ide_queue_sense_rq(ide_drive_t *drive, void *special); + int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v1.2.3 From 068753203e6cd085664a62e0fc0636e19b148a12 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense buffer Since we're issuing REQ_TYPE_SENSE now we need to allow those types of rqs in the ->do_request callbacks. As a future improvement, sense_len assignment might be unified across all ATAPI devices. Borislav to check with specs and test. As a result, get rid of ide_queue_pc_head() and drive->request_sense_rq. tj: * Init request sense ide_atapi_pc from sense request. In the longer timer, it would probably better to fold ide_create_request_sense_cmd() into its only current user - ide_floppy_get_format_progress(). * ide_retry_pc() no longer takes @disk. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index a69ccac56411..9e67ccac3c1f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -604,7 +604,6 @@ struct ide_drive_s { unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; /* current sense rq and buffer */ bool sense_rq_armed; @@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); void ide_queue_sense_rq(ide_drive_t *drive, void *special); -- cgit v1.2.3 From 02e7cf8f848841ca21864ccd019e480b73c323b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd,atapi: use bio for internal commands Impact: unify request data buffer handling rq->data is used mostly to pass kernel buffer through request queue without using bio. There are only a couple of places which still do this in kernel and converting to bio isn't difficult. This patch converts ide-cd and atapi to use bio instead of rq->data for request sense and internal pc commands. With previous change to unify sense request handling, this is relatively easily achieved by adding blk_rq_map_kern() during sense_rq prep and PC issue. If blk_rq_map_kern() fails for sense, the error is deferred till sense issue and aborts the failed command which triggered the sense. Note that this is a slim possibility as sense prep is done on each command issue, so for the above condition to actually trigger, all preps since the last sense issue till the issue of the request which would require a sense should fail. * do_request functions might sleep now. This should be okay as ide request_fn - do_ide_request() - is invoked only from make_request and plug work. Make sure this is the case by adding might_sleep() to do_ide_request(). * Functions which access the read sense data before the sense request is complete now should access bio_data(sense_rq->bio) as the sense buffer might have been copied during blk_rq_map_kern(). * ide-tape updated to map sg. * cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC special case. Simplified. * tp_ops->output/input_data path dropped from ide_pc_intr(). Signed-off-by: Tejun Heo --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9e67ccac3c1f..1957461ac762 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); -void ide_queue_sense_rq(ide_drive_t *drive, void *special); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); -- cgit v1.2.3 From 29d1a4371035e01b0d079bc5aa88b50f5af7a566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-atapi: kill unused fields and callbacks Impact: remove fields and code paths which are no longer necessary Now that ide-tape uses standard mechanisms to transfer data, special case handling for bh handling can be dropped from ide-atapi. Drop the followings. * pc->cur_pos, b_count, bh and b_data * drive->pc_update_buffers() and pc_io_buffers(). Signed-off-by: Tejun Heo --- include/linux/ide.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1957461ac762..34c128f0a33c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -362,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -379,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -595,10 +587,6 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; -- cgit v1.2.3 From a7f557923441186a3cdbabc54f1bcacf42b63bf5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:17 +0900 Subject: block: kill blk_start_queueing() blk_start_queueing() is identical to __blk_run_queue() except that it doesn't check for recursion. None of the current users depends on blk_start_queueing() running request_fn directly. Replace usages of blk_start_queueing() with [__]blk_run_queue() and kill it. [ Impact: removal of mostly duplicate interface function ] Signed-off-by: Tejun Heo --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2755d5c6da22..12e20de44b60 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -797,7 +797,6 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); -extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3 From 5efccd17ceb0fc43837a331297c2c407969d7201 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: reorder request completion functions Reorder request completion functions such that * All request completion functions are located together. * Functions which are used by only one caller is put right above the caller. * end_request() is put after other completion functions but before blk_update_request(). This change is for completion function cleanup which will follow. [ Impact: cleanup, code reorganization ] Signed-off-by: Tejun Heo --- include/linux/blkdev.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12e20de44b60..156ffd9de967 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -831,6 +831,14 @@ static inline void blk_run_address_space(struct address_space *mapping) extern void blkdev_dequeue_request(struct request *req); +/* + * blk_end_request() takes bytes instead of sectors as a complete size. + * blk_rq_bytes() returns bytes left to complete in the entire request. + * blk_rq_cur_bytes() returns bytes left to complete in the current segment. + */ +extern unsigned int blk_rq_bytes(struct request *rq); +extern unsigned int blk_rq_cur_bytes(struct request *rq); + /* * blk_end_request() and friends. * __blk_end_request() and end_request() must be called with @@ -857,14 +865,6 @@ extern void blk_abort_queue(struct request_queue *); extern void blk_update_request(struct request *rq, int error, unsigned int nr_bytes); -/* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. - */ -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); - /* * Access functions for manipulating queue properties */ -- cgit v1.2.3 From 0b302d5aa7975006fa2ec3d66386610b9b36c669 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: kill blk_end_request_callback() With recent IDE updates, blk_end_request_callback() doesn't have any user now. Kill it. [ Impact: removal of unused convoluted interface ] Signed-off-by: Tejun Heo --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 156ffd9de967..1fa9dcf9aa6a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -855,9 +855,6 @@ extern int __blk_end_request(struct request *rq, int error, extern int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); extern void end_request(struct request *, int); -extern int blk_end_request_callback(struct request *rq, int error, - unsigned int nr_bytes, - int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -- cgit v1.2.3 From 2e60e02297cf54e367567f2d85b2ca56b1c4a906 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: clean up request completion API Request completion has gone through several changes and became a bit messy over the time. Clean it up. 1. end_that_request_data() is a thin wrapper around end_that_request_data_first() which checks whether bio is NULL before doing anything and handles bidi completion. blk_update_request() is a thin wrapper around end_that_request_data() which clears nr_sectors on the last iteration but doesn't use the bidi completion. Clean it up by moving the initial bio NULL check and nr_sectors clearing on the last iteration into end_that_request_data() and renaming it to blk_update_request(), which makes blk_end_io() the only user of end_that_request_data(). Collapse end_that_request_data() into blk_end_io(). 2. There are four visible completion variants - blk_end_request(), __blk_end_request(), blk_end_bidi_request() and end_request(). blk_end_request() and blk_end_bidi_request() uses blk_end_request() as the backend but __blk_end_request() and end_request() use separate implementation in __blk_end_request() due to different locking rules. blk_end_bidi_request() is identical to blk_end_io(). Collapse blk_end_io() into blk_end_bidi_request(), separate out request update into internal helper blk_update_bidi_request() and add __blk_end_bidi_request(). Redefine [__]blk_end_request() as thin inline wrappers around [__]blk_end_bidi_request(). 3. As the whole request issue/completion usages are about to be modified and audited, it's a good chance to convert completion functions return bool which better indicates the intended meaning of return values. 4. The function name end_that_request_last() is from the days when it was a public interface and slighly confusing. Give it a proper internal name - blk_finish_request(). 5. Add description explaning that blk_end_bidi_request() can be safely used for uni requests as suggested by Boaz Harrosh. The only visible behavior change is from #1. nr_sectors counts are cleared after the final iteration no matter which function is used to complete the request. I couldn't find any place where the code assumes those nr_sectors counters contain the values for the last segment and this change is good as it makes the API much more consistent as the end result is now same whether a request is completed using [__]blk_end_request() alone or in combination with blk_update_request(). API further cleaned up per Christoph's suggestion. [ Impact: cleanup, rq->*nr_sectors always updated after req completion ] Signed-off-by: Tejun Heo Reviewed-by: Boaz Harrosh Cc: Christoph Hellwig --- include/linux/blkdev.h | 94 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1fa9dcf9aa6a..501f6845cc73 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -840,27 +840,97 @@ extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); /* - * blk_end_request() and friends. - * __blk_end_request() and end_request() must be called with - * the request queue spinlock acquired. + * Request completion related functions. + * + * blk_update_request() completes given number of bytes and updates + * the request without completing it. + * + * blk_end_request() and friends. __blk_end_request() and + * end_request() must be called with the request queue spinlock + * acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. * This prevents code duplication in drivers. */ -extern int blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int __blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes); -extern void end_request(struct request *, int); +extern bool blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); +extern bool blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes); +extern bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes); + +/** + * blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Ends I/O on a number of bytes attached to @rq. + * If @rq has leftover, sets it up for the next range of segments. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +static inline bool blk_end_request(struct request *rq, int error, + unsigned int nr_bytes) +{ + return blk_end_bidi_request(rq, error, nr_bytes, 0); +} + +/** + * __blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Must be called with queue lock held unlike blk_end_request(). + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +static inline bool __blk_end_request(struct request *rq, int error, + unsigned int nr_bytes) +{ + return __blk_end_bidi_request(rq, error, nr_bytes, 0); +} + +/** + * end_request - end I/O on the current segment of the request + * @rq: the request being processed + * @uptodate: error value or %0/%1 uptodate flag + * + * Description: + * Ends I/O on the current segment of a request. If that is the only + * remaining segment, the request is also completed and freed. + * + * This is a remnant of how older block drivers handled I/O completions. + * Modern drivers typically end I/O on the full request in one go, unless + * they have a residual value to account for. For that case this function + * isn't really useful, unless the residual just happens to be the + * full current segment. In other words, don't use this function in new + * code. Use blk_end_request() or __blk_end_request() to end a request. + **/ +static inline void end_request(struct request *rq, int uptodate) +{ + int error = 0; + + if (uptodate <= 0) + error = uptodate ? uptodate : -EIO; + + __blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0); +} + extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); -extern void blk_update_request(struct request *rq, int error, - unsigned int nr_bytes); /* * Access functions for manipulating queue properties -- cgit v1.2.3 From 40cbbb781d3eba5d6ac0860db078af490e5c7c6b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: implement and use [__]blk_end_request_all() There are many [__]blk_end_request() call sites which call it with full request length and expect full completion. Many of them ensure that the request actually completes by doing BUG_ON() the return value, which is awkward and error-prone. This patch adds [__]blk_end_request_all() which takes @rq and @error and fully completes the request. BUG_ON() is added to to ensure that this actually happens. Most conversions are simple but there are a few noteworthy ones. * cdrom/viocd: viocd_end_request() replaced with direct calls to __blk_end_request_all(). * s390/block/dasd: dasd_end_request() replaced with direct calls to __blk_end_request_all(). * s390/char/tape_block: tapeblock_end_request() replaced with direct calls to blk_end_request_all(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Russell King Cc: Stephen Rothwell Cc: Mike Miller Cc: Martin Schwidefsky Cc: Jeff Garzik Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Alex Dubov Cc: James Bottomley --- include/linux/blkdev.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 501f6845cc73..e33c8356b3da 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -882,6 +882,22 @@ static inline bool blk_end_request(struct request *rq, int error, return blk_end_bidi_request(rq, error, nr_bytes, 0); } +/** + * blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. + */ +static inline void blk_end_request_all(struct request *rq, int error) +{ + bool pending; + + pending = blk_end_request(rq, error, blk_rq_bytes(rq)); + BUG_ON(pending); +} + /** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed @@ -901,6 +917,22 @@ static inline bool __blk_end_request(struct request *rq, int error, return __blk_end_bidi_request(rq, error, nr_bytes, 0); } +/** + * __blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. Must be called with queue lock held. + */ +static inline void __blk_end_request_all(struct request *rq, int error) +{ + bool pending; + + pending = __blk_end_request(rq, error, blk_rq_bytes(rq)); + BUG_ON(pending); +} + /** * end_request - end I/O on the current segment of the request * @rq: the request being processed -- cgit v1.2.3 From f06d9a2b52e246a66b606130cea3f0d7b7be17a7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: replace end_request() with [__]blk_end_request_cur() end_request() has been kept around for backward compatibility; however, it's about time for it to go away. * There aren't too many users left. * Its use of @updtodate is pretty confusing. * In some cases, newer code ends up using mixture of end_request() and [__]blk_end_request[_all](), which is way too confusing. So, add [__]blk_end_request_cur() and replace end_request() with it. Most conversions are straightforward. Noteworthy ones are... * paride/pcd: next_request() updated to take 0/-errno instead of 1/0. * paride/pf: pf_end_request() and next_request() updated to take 0/-errno instead of 1/0. * xd: xd_readwrite() updated to return 0/-errno instead of 1/0. * mtd/mtd_blkdevs: blktrans_discard_request() updated to return 0/-errno instead of 1/0. Unnecessary local variable res initialization removed from mtd_blktrans_thread(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Joerg Dorchain Acked-by: Geert Uytterhoeven Acked-by: Grant Likely Acked-by: Laurent Vivier Cc: Tim Waugh Cc: Stephen Rothwell Cc: Paul Mackerras Cc: Jeremy Fitzhardinge Cc: Markus Lidel Cc: David Woodhouse Cc: Pete Zaitcev Cc: unsik Kim --- include/linux/blkdev.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e33c8356b3da..cfeb3c2feb27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,9 +845,8 @@ extern unsigned int blk_rq_cur_bytes(struct request *rq); * blk_update_request() completes given number of bytes and updates * the request without completing it. * - * blk_end_request() and friends. __blk_end_request() and - * end_request() must be called with the request queue spinlock - * acquired. + * blk_end_request() and friends. __blk_end_request() must be called + * with the request queue spinlock acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. @@ -898,6 +897,19 @@ static inline void blk_end_request_all(struct request *rq, int error) BUG_ON(pending); } +/** + * blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error + * + * Description: + * Complete the current consecutively mapped chunk from @rq. + */ +static inline void blk_end_request_cur(struct request *rq, int error) +{ + blk_end_request(rq, error, rq->hard_cur_sectors << 9); +} + /** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed @@ -934,29 +946,17 @@ static inline void __blk_end_request_all(struct request *rq, int error) } /** - * end_request - end I/O on the current segment of the request - * @rq: the request being processed - * @uptodate: error value or %0/%1 uptodate flag + * __blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error * * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. - **/ -static inline void end_request(struct request *rq, int uptodate) + * Complete the current consecutively mapped chunk from @rq. Must + * be called with queue lock held. + */ +static inline void __blk_end_request_cur(struct request *rq, int error) { - int error = 0; - - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; - - __blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0); + __blk_end_request(rq, error, rq->hard_cur_sectors << 9); } extern void blk_complete_request(struct request *); -- cgit v1.2.3 From 731ec497e5888c6792ad62613ae9be97eebcd7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:20 +0900 Subject: block: kill rq->data Now that all block request data transfer is done via bio, rq->data isn't used. Kill it. While at it, make the roles of rq->special and buffer clear. [ Impact: drop now unncessary field from struct request ] Signed-off-by: Tejun Heo Cc: Boaz Harrosh --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfeb3c2feb27..12c545e2737c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -211,8 +211,8 @@ struct request { unsigned short ioprio; - void *special; - char *buffer; + void *special; /* opaque pointer available for LLD use */ + char *buffer; /* kaddr of the current segment if available */ int tag; int errors; @@ -229,7 +229,6 @@ struct request { unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; - void *data; void *sense; unsigned long deadline; -- cgit v1.2.3 From 9fd8d0e1bcb848257968d9a7d73ca4d890ea8bd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:04 +0900 Subject: block: make blk_end_request_cur() return bool In the process of mindlessly copying [__]blk_end_request_all(), [__]blk_end_request_cur() ended up returning void even though they're partial completion functions. Fix it. [ Impact: fix braindead API ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12c545e2737c..3a5b1bd6582c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -903,10 +903,14 @@ static inline void blk_end_request_all(struct request *rq, int error) * * Description: * Complete the current consecutively mapped chunk from @rq. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request */ -static inline void blk_end_request_cur(struct request *rq, int error) +static inline bool blk_end_request_cur(struct request *rq, int error) { - blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return blk_end_request(rq, error, rq->hard_cur_sectors << 9); } /** @@ -952,10 +956,14 @@ static inline void __blk_end_request_all(struct request *rq, int error) * Description: * Complete the current consecutively mapped chunk from @rq. Must * be called with queue lock held. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request */ -static inline void __blk_end_request_cur(struct request *rq, int error) +static inline bool __blk_end_request_cur(struct request *rq, int error) { - __blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); } extern void blk_complete_request(struct request *); -- cgit v1.2.3 From eec9462088a26c046d4db3100796a340a50890b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:14 +0900 Subject: mg_disk: fold mg_disk.h into mg_disk.c include/linux/mg_disk.h is used only by drivers/block/mg_disk.c. No reason to put it in a separate header. Fold it into mg_disk.c. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: unsik Kim Signed-off-by: Jens Axboe --- include/linux/mg_disk.h | 206 ------------------------------------------------ 1 file changed, 206 deletions(-) delete mode 100644 include/linux/mg_disk.h (limited to 'include') diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h deleted file mode 100644 index 1f76b1ebf627..000000000000 --- a/include/linux/mg_disk.h +++ /dev/null @@ -1,206 +0,0 @@ -/* - * include/linux/mg_disk.c - * - * Support for the mGine m[g]flash IO mode. - * Based on legacy hd.c - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MG_DISK_H__ -#define __MG_DISK_H__ - -#include -#include - -/* name for block device */ -#define MG_DISK_NAME "mgd" -/* name for platform device */ -#define MG_DEV_NAME "mg_disk" - -#define MG_DISK_MAJ 0 -#define MG_DISK_MAX_PART 16 -#define MG_SECTOR_SIZE 512 -#define MG_MAX_SECTS 256 - -/* Register offsets */ -#define MG_BUFF_OFFSET 0x8000 -#define MG_STORAGE_BUFFER_SIZE 0x200 -#define MG_REG_OFFSET 0xC000 -#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ -#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ -#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) -#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) -#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) -#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) -#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) -#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ -#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ -#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) -#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) - -/* "Drive Select/Head Register" bit values */ -#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ -#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) - - -/* "Device Control Register" bit values */ -#define MG_REG_CTRL_INTR_ENABLE 0x0 -#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) -#define MG_REG_CTRL_RESET (0x1<<2) -#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 -#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) -#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 -#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) -#define MG_REG_CTRL_DPD_DISABLE 0x0 -#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) - -/* Status register bit */ -/* error bit in status register */ -#define MG_REG_STATUS_BIT_ERROR 0x01 -/* corrected error in status register */ -#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 -/* data request bit in status register */ -#define MG_REG_STATUS_BIT_DATA_REQ 0x08 -/* DSC - Drive Seek Complete */ -#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 -/* DWF - Drive Write Fault */ -#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 -#define MG_REG_STATUS_BIT_READY 0x40 -#define MG_REG_STATUS_BIT_BUSY 0x80 - -/* handy status */ -#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ - (MG_REG_STATUS_BIT_BUSY | \ - MG_REG_STATUS_BIT_WRITE_FAULT | \ - MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) - -/* Error register */ -#define MG_REG_ERR_AMNF 0x01 -#define MG_REG_ERR_ABRT 0x04 -#define MG_REG_ERR_IDNF 0x10 -#define MG_REG_ERR_UNC 0x40 -#define MG_REG_ERR_BBK 0x80 - -/* error code for others */ -#define MG_ERR_NONE 0 -#define MG_ERR_TIMEOUT 0x100 -#define MG_ERR_INIT_STAT 0x101 -#define MG_ERR_TRANSLATION 0x102 -#define MG_ERR_CTRL_RST 0x103 -#define MG_ERR_INV_STAT 0x104 -#define MG_ERR_RSTOUT 0x105 - -#define MG_MAX_ERRORS 6 /* Max read/write errors */ - -/* command */ -#define MG_CMD_RD 0x20 -#define MG_CMD_WR 0x30 -#define MG_CMD_SLEEP 0x99 -#define MG_CMD_WAKEUP 0xC3 -#define MG_CMD_ID 0xEC -#define MG_CMD_WR_CONF 0x3C -#define MG_CMD_RD_CONF 0x40 - -/* operation mode */ -#define MG_OP_CASCADE (1 << 0) -#define MG_OP_CASCADE_SYNC_RD (1 << 1) -#define MG_OP_CASCADE_SYNC_WR (1 << 2) -#define MG_OP_INTERLEAVE (1 << 3) - -/* synchronous */ -#define MG_BURST_LAT_4 (3 << 4) -#define MG_BURST_LAT_5 (4 << 4) -#define MG_BURST_LAT_6 (5 << 4) -#define MG_BURST_LAT_7 (6 << 4) -#define MG_BURST_LAT_8 (7 << 4) -#define MG_BURST_LEN_4 (1 << 1) -#define MG_BURST_LEN_8 (2 << 1) -#define MG_BURST_LEN_16 (3 << 1) -#define MG_BURST_LEN_32 (4 << 1) -#define MG_BURST_LEN_CONT (0 << 1) - -/* timeout value (unit: ms) */ -#define MG_TMAX_CONF_TO_CMD 1 -#define MG_TMAX_WAIT_RD_DRQ 10 -#define MG_TMAX_WAIT_WR_DRQ 500 -#define MG_TMAX_RST_TO_BUSY 10 -#define MG_TMAX_HDRST_TO_RDY 500 -#define MG_TMAX_SWRST_TO_RDY 500 -#define MG_TMAX_RSTOUT 3000 - -/* device attribution */ -/* use mflash as boot device */ -#define MG_BOOT_DEV (1 << 0) -/* use mflash as storage device */ -#define MG_STORAGE_DEV (1 << 1) -/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ -#define MG_STORAGE_DEV_SKIP_RST (1 << 2) - -#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) - -/* names of GPIO resource */ -#define MG_RST_PIN "mg_rst" -/* except MG_BOOT_DEV, reset-out pin should be assigned */ -#define MG_RSTOUT_PIN "mg_rstout" - -/* private driver data */ -struct mg_drv_data { - /* disk resource */ - u32 use_polling; - - /* device attribution */ - u32 dev_attr; - - /* internally used */ - struct mg_host *host; -}; - -/* main structure for mflash driver */ -struct mg_host { - struct device *dev; - - struct request_queue *breq; - spinlock_t lock; - struct gendisk *gd; - - struct timer_list timer; - void (*mg_do_intr) (struct mg_host *); - - u16 id[ATA_ID_WORDS]; - - u16 cyls; - u16 heads; - u16 sectors; - u32 n_sectors; - u32 nres_sectors; - - void __iomem *dev_base; - unsigned int irq; - unsigned int rst; - unsigned int rstout; - - u32 major; - u32 error; -}; - -/* - * Debugging macro and defines - */ -#undef DO_MG_DEBUG -#ifdef DO_MG_DEBUG -# define MG_DBG(fmt, args...) \ - printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) -#else /* CONFIG_MG_DEBUG */ -# define MG_DBG(fmt, args...) do { } while (0) -#endif /* CONFIG_MG_DEBUG */ - -#endif -- cgit v1.2.3 From 9ec4fa271faf2db3b8e1419c998da1ca6b094eb6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:57:18 -0700 Subject: irq, cpumask: correct CPUMASKS_OFFSTACK typo and fix fallout CPUMASKS_OFFSTACK is not defined anywhere (it is CPUMASK_OFFSTACK). It is a typo and init_allocate_desc_masks() is called before it set affinity to all cpus... Split init_alloc_desc_masks() into all_desc_masks() and init_desc_masks(). Also use CPUMASK_OFFSTACK in alloc_desc_masks(). [ Impact: fix smp_affinity copying/setup when moving irq_desc between CPUs ] Signed-off-by: Yinghai Lu Acked-by: Rusty Russell Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" LKML-Reference: <49F6546E.3040406@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index b7cbeed972e4..c4953cf27e5e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -424,27 +424,25 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #ifdef CONFIG_SMP /** - * init_alloc_desc_masks - allocate cpumasks for irq_desc + * alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). - * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { +#ifdef CONFIG_CPUMASK_OFFSTACK int node; if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ alloc_bootmem_cpumask_var(&desc->pending_mask); - cpumask_clear(desc->pending_mask); #endif return true; } @@ -453,18 +451,25 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { free_cpumask_var(desc->affinity); return false; } - cpumask_clear(desc->pending_mask); +#endif #endif return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif +} + /** * init_copy_desc_masks - copy cpumasks for irq_desc * @old_desc: pointer to old irq_desc struct @@ -478,7 +483,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { -#ifdef CONFIG_CPUMASKS_OFFSTACK +#ifdef CONFIG_CPUMASK_OFFSTACK cpumask_copy(new_desc->affinity, old_desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -499,12 +504,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ +} + static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { -- cgit v1.2.3 From fcef5911c7ea89b80d5bfc727f402f37c9eefd57 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:58:23 -0700 Subject: x86/irq: remove leftover code from NUMA_MIGRATE_IRQ_DESC The original feature of migrating irq_desc dynamic was too fragile and was causing problems: it caused crashes on systems with lots of cards with MSI-X when user-space irq-balancer was enabled. We now have new patches that create irq_desc according to device numa node. This patch removes the leftover bits of the dynamic balancer. [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F654AF.8000808@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index c4953cf27e5e..2a34cd6281d7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,16 +212,6 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); -static inline struct irq_desc * -irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - return irq_to_desc(irq); -#else - return desc; -#endif -} - /* * Migration helpers for obsolete names, they will go away: */ -- cgit v1.2.3 From d5dedd4507d307eb3f35f21b6e16f336fdc0d82a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:59:21 -0700 Subject: irq: change ->set_affinity() to return status according to Ingo, change set_affinity() in irq_chip should return int, because that way we can handle failure cases in a much cleaner way, in the genirq layer. v2: fix two typos [ Impact: extend API ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: linux-arch@vger.kernel.org LKML-Reference: <49F654E9.4070809@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2a34cd6281d7..8e4c18b29157 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -117,7 +117,7 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, + int (*set_affinity)(unsigned int irq, const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); -- cgit v1.2.3 From 85ac16d033370caf6f48d743c8dc8103700f5cc5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:00:38 -0700 Subject: x86/irq: change irq_desc_alloc() to take node instead of cpu This simplifies the node awareness of the code. All our allocators only deal with a NUMA node ID locality not with CPU ids anyway - so there's no need to maintain (and transform) a CPU id all across the IRq layer. v2: keep move_irq_desc related [ Impact: cleanup, prepare IRQ code to be NUMA-aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Jeremy Fitzhardinge LKML-Reference: <49F65536.2020300@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 2 +- include/linux/irq.h | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 91bb76f44f14..ff374ceface0 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,6 +566,6 @@ struct irq_desc; extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int cpu); +extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 8e4c18b29157..a09baf8f9d99 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -187,7 +187,7 @@ struct irq_desc { spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; - unsigned int cpu; + unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif @@ -201,16 +201,16 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu); + struct irq_desc *desc, int node); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; #else /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); #endif /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); /* * Migration helpers for obsolete names, they will go away: @@ -422,12 +422,10 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { #ifdef CONFIG_CPUMASK_OFFSTACK - int node; - if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); @@ -437,8 +435,6 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, return true; } - node = cpu_to_node(cpu); - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; @@ -494,7 +490,7 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { return true; -- cgit v1.2.3 From a2f809b08ae4dddc1015c7dcd8659e5729e45b3e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:01:20 -0700 Subject: irq: change ACPI GSI APIs to also take a device argument We want to use dev_to_node() later on, to be aware of the 'home node' of the GSI in question. [ Impact: cleanup, prepare the IRQ code to be more NUMA aware ] Signed-off-by: Yinghai Lu Acked-by: Len Brown Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Len Brown Cc: Bjorn Helgaas Cc: Tony Luck Cc: linux-acpi@vger.kernel.org Cc: linux-ia64@vger.kernel.org LKML-Reference: <49F65560.20904@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88be890ee3c7..51b4b0a5ce8c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num; extern int sbf_port; extern unsigned long acpi_realmode_flags; -int acpi_register_gsi (u32 gsi, int triggering, int polarity); +int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC -- cgit v1.2.3 From d047f53a2ecce37e3bdf79eac5a326fbaadb3628 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:02:23 -0700 Subject: x86/irq: change MSI irq_desc to be more numa aware Try to get irq_desc on the home node in create_irq_nr(). v2: don't check if we can move it when sparse_irq is not used v3: use move_irq_des, if that node is not what we want [ Impact: optimization, make MSI IRQ descriptors more NUMA aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F6559F.7070005@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index a09baf8f9d99..4b95ddb5304b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -376,7 +376,7 @@ extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); /* Handle dynamic irq creation and destruction */ -extern unsigned int create_irq_nr(unsigned int irq_want); +extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -- cgit v1.2.3 From 0f9a623dd6c9b5b4dd00c232f29525bfc7a8ecf2 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:51 +0100 Subject: tracing: x86, mmiotrace: only register for die notifier when tracer active Follow up to afcfe024aebd74b0984a41af9a34e009cf5badaf in Linus' tree ("x86: mmiotrace: quieten spurious warning message") Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-5-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 3d1b7bde1283..97491f78b08c 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -30,6 +30,8 @@ extern unsigned int kmmio_count; extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); +extern int kmmio_init(void); +extern void kmmio_cleanup(void); #ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ -- cgit v1.2.3 From 30e673b230f9d556eb81ef68a7b1a08c8b3b142c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:47 -0500 Subject: tracing/filters: move preds into event_filter object Create a new event_filter object, and move the pred-related members out of the call and subsystem objects and into the filter object - the details of the filter implementation don't need to be exposed in the call and subsystem in any case, and it will also help make the new parser implementation a little cleaner. [ Impact: refactor trace-filter code to prepare for new features ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905887.6416.119.camel@tropicana> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 78a9ba24cbf6..46a27f2695a6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -101,8 +101,8 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; - int n_preds; - struct filter_pred **preds; + int filter_active; + void *filter; void *mod; #ifdef CONFIG_EVENT_PROFILE -- cgit v1.2.3 From a118e4d1402f1349fe3d953493e4168a300a752d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:53 -0500 Subject: tracing/filters: distinguish between signed and unsigned fields The new filter comparison ops need to be able to distinguish between signed and unsigned field types, so add an is_signed flag/param to the event field struct/trace_define_fields(). Also define a simple macro, is_signed_type() to determine the signedness at compile time, used in the trace macros. If the is_signed_type() macro won't work with a specific type, a new slightly modified version of TRACE_FIELD() called TRACE_FIELD_SIGN(), allows the signedness to be set explicitly. [ Impact: extend trace-filter code for new feature ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905893.6416.120.camel@tropicana> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 7 ++++--- include/trace/ftrace.h | 16 ++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 46a27f2695a6..e61a7403f3d0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -122,8 +122,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, struct ring_buffer_event *event); extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size); + char *name, int offset, int size, int is_signed); +#define is_signed_type(type) (((type)(-1)) < 0) /* * The double __builtin_constant_p is because gcc will give us an error @@ -144,10 +145,10 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#define __common_field(type, item) \ +#define __common_field(type, item, is_signed) \ ret = trace_define_field(event_call, #type, "common_" #item, \ offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ + sizeof(field.ent.item), is_signed); \ if (ret) \ return ret; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1e681142f1da..edb02bc9f8ff 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -225,7 +225,7 @@ ftrace_format_##call(struct trace_seq *s) \ #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (ret) \ return ret; @@ -234,7 +234,7 @@ ftrace_format_##call(struct trace_seq *s) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), 0); \ if (ret) \ return ret; @@ -242,7 +242,7 @@ ftrace_format_##call(struct trace_seq *s) \ #define __string(item, src) \ ret = trace_define_field(event_call, "__str_loc", #item, \ offsetof(typeof(field), __str_loc_##item), \ - sizeof(field.__str_loc_##item)); + sizeof(field.__str_loc_##item), 0); #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ @@ -253,11 +253,11 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(int, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ + __common_field(int, type, 1); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ \ tstruct; \ \ -- cgit v1.2.3 From 8b3725621074040d380664964ffbc40610aef8c6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:59 -0500 Subject: tracing/filters: a better event parser Replace the current event parser hack with a better one. Filters are no longer specified predicate by predicate, but all at once and can use parens and any of the following operators: numeric fields: ==, !=, <, <=, >, >= string fields: ==, != predicates can be combined with the logical operators: &&, || examples: "common_preempt_count > 4" > filter "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter If there was an error, the erroneous string along with an error message can be seen by looking at the filter e.g.: ((sig >= 10 && sig < 15) || dsig == 17) && comm != bash ^ parse_error: Field not found Currently the caret for an error always appears at the beginning of the filter; a real position should be used, but the error message should be useful even without it. To clear a filter, '0' can be written to the filter file. Filters can also be set or cleared for a complete subsystem by writing the same filter as would be written to an individual event to the filter file at the root of the subsytem. Note however, that if any event in the subsystem lacks a field specified in the filter being set, the set will fail and all filters in the subsytem are automatically cleared. This change from the previous version was made because using only the fields that happen to exist for a given event would most likely result in a meaningless filter. Because the logical operators are now implemented as predicates, the maximum number of predicates in a filter was increased from 8 to 16. [ Impact: add new, extended trace-filter implementation ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905899.6416.121.camel@tropicana> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index e61a7403f3d0..5fff40c9ff59 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,7 +112,7 @@ struct ftrace_event_call { #endif }; -#define MAX_FILTER_PRED 8 +#define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 128 extern int init_preds(struct ftrace_event_call *call); -- cgit v1.2.3 From 829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:46:59 +0200 Subject: perf_counter, x86: declare perf_max_counters only for CONFIG_PERF_COUNTERS This is only needed for CONFIG_PERF_COUNTERS enabled. [ Impact: cleanup ] Signed-off-by: Robert Richter Acked-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1241002046-8832-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 981432885301..be10b3ffe320 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -512,12 +512,13 @@ struct perf_cpu_context { int recursion[4]; }; +#ifdef CONFIG_PERF_COUNTERS + /* * Set by architecture code: */ extern int perf_max_counters; -#ifdef CONFIG_PERF_COUNTERS extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); -- cgit v1.2.3 From 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:03 +0200 Subject: perfcounters: rename struct hw_perf_counter_ops into struct pmu This patch renames struct hw_perf_counter_ops into struct pmu. It introduces a structure to describe a cpu specific pmu (performance monitoring unit). It may contain ops and data. The new name of the structure fits better, is shorter, and thus better to handle. Where it was appropriate, names of function and variable have been changed too. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-7-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index be10b3ffe320..c3db52dc876a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -334,9 +334,9 @@ struct hw_perf_counter { struct perf_counter; /** - * struct hw_perf_counter_ops - performance counter hw ops + * struct pmu - generic performance monitoring unit */ -struct hw_perf_counter_ops { +struct pmu { int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); @@ -381,7 +381,7 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; - const struct hw_perf_counter_ops *hw_ops; + const struct pmu *pmu; enum perf_counter_active_state state; enum perf_counter_active_state prev_state; @@ -519,8 +519,7 @@ struct perf_cpu_context { */ extern int perf_max_counters; -extern const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter); +extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); -- cgit v1.2.3 From 6f00cada07bb5da7f751929d3173494dcc5446cc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:17 +0200 Subject: perf_counter, x86: consistent use of type int for counter index The type of counter index is sometimes implemented as unsigned int. This patch changes this to have a consistent usage of int. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-21-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c3db52dc876a..41aed4270057 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -318,7 +318,7 @@ struct hw_perf_counter { unsigned long config_base; unsigned long counter_base; int nmi; - unsigned int idx; + int idx; }; union { /* software */ atomic64_t count; -- cgit v1.2.3 From b1fca26631f76a5e8b18435a43f5d82b8734da4b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Mar 2009 18:22:09 +0100 Subject: mutex: add atomic_dec_and_mutex_lock() Much like the atomic_dec_and_lock() function in which we take an hold a spin_lock if we drop the atomic to 0 this function takes and holds the mutex if we dec the atomic to 0. Signed-off-by: Eric Paris Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.410913479@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab8..93054fc3635c 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} + #endif -- cgit v1.2.3 From 3bcac0263f0b45e67a64034ebcb69eb9abb742f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Apr 2009 13:45:05 +0100 Subject: SELinux: Don't flush inherited SIGKILL during execve() Don't flush inherited SIGKILL during execve() in SELinux's post cred commit hook. This isn't really a security problem: if the SIGKILL came before the credentials were changed, then we were right to receive it at the time, and should honour it; if it came after the creds were changed, then we definitely should honour it; and in any case, all that will happen is that the process will be scrapped before it ever returns to userspace. Signed-off-by: David Howells Signed-off-by: Oleg Nesterov Signed-off-by: James Morris --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1d19c025f9d2..d3b787c7aef3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1875,6 +1875,7 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); +extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); -- cgit v1.2.3 From a511e3f968c462a55ef58697257f5347c73d306e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 29 Apr 2009 15:59:58 -0700 Subject: mutex: add atomic_dec_and_mutex_lock(), fix include/linux/mutex.h:136: warning: 'mutex_lock' declared inline after being called include/linux/mutex.h:136: warning: previous declaration of 'mutex_lock' was here uninline it. [ Impact: clean up and uninline, address compiler warning ] Signed-off-by: Andrew Morton Cc: Al Viro Cc: Christoph Hellwig Cc: Eric Paris Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <200904292318.n3TNIsi6028340@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 93054fc3635c..878cab4f5fcc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -150,28 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); - -/** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 - * @cnt: the atomic which we are to dec - * @lock: the mutex to return holding if we dec to 0 - * - * return true and hold lock if we dec to 0, return false otherwise - */ -static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -{ - /* dec if we can't possibly hit 0 */ - if (atomic_add_unless(cnt, -1, 1)) - return 0; - /* we might hit 0, so take the lock */ - mutex_lock(lock); - if (!atomic_dec_and_test(cnt)) { - /* when we actually did the dec, we didn't hit 0 */ - mutex_unlock(lock); - return 0; - } - /* we hit 0, and we hold the lock */ - return 1; -} +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif -- cgit v1.2.3 From ba9c22f2c01cf5c88beed5a6b9e07d42e10bd358 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 20 Apr 2009 22:22:22 -0700 Subject: futex: remove FUTEX_REQUEUE_PI (non CMP) The new requeue PI futex op codes were modeled after the existing FUTEX_REQUEUE and FUTEX_CMP_REQUEUE calls. I was unaware at the time that FUTEX_REQUEUE was only around for compatibility reasons and shouldn't be used in new code. Ulrich Drepper elaborates on this in his Futexes are Tricky paper: http://people.redhat.com/drepper/futex.pdf. The deprecated call doesn't catch changes to the futex corresponding to the destination futex which can lead to deadlock. Therefor, I feel it best to remove FUTEX_REQUEUE_PI and leave only FUTEX_CMP_REQUEUE_PI as there are not yet any existing users of the API. This patch does change the OP code value of FUTEX_CMP_REQUEUE_PI to 12 from 13. Since my test case is the only known user of this API, I felt this was the right thing to do, rather than leave a hole in the enumeration. I chose to continue using the _CMP_ modifier in the OP code to make it explicit to the user that the test is being done. Builds, boots, and ran several hundred iterations requeue_pi.c. Signed-off-by: Darren Hart LKML-Reference: <49ED580E.1050502@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index b05519ca9e57..34956c8fdebf 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -24,8 +24,7 @@ union ktime; #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 -#define FUTEX_REQUEUE_PI 12 -#define FUTEX_CMP_REQUEUE_PI 13 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -43,7 +42,6 @@ union ktime; #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) -#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) -- cgit v1.2.3 From 62ab4505e3efaf67784f84059e0fb9cedb1728ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:06 +0000 Subject: signals: implement sys_rt_tgsigqueueinfo sys_kill has the per thread counterpart sys_tgkill. sigqueueinfo is missing a thread directed counterpart. Such an interface is important for migrating applications from other OSes which have the per thread delivery implemented. Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: Ulrich Drepper --- include/linux/compat.h | 2 ++ include/linux/signal.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3c..af931ee43dd8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa53..c7552836bd95 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; -- cgit v1.2.3 From c33a0bc4e41ef169d6e807d8abb9502544b518e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 1 May 2009 12:23:16 +0200 Subject: perf_counter: fix race in perf_output_* When two (or more) contexts output to the same buffer, it is possible to observe half written output. Suppose we have CPU0 doing perf_counter_mmap(), CPU1 doing perf_counter_overflow(). If CPU1 does a wakeup and exposes head to user-space, then CPU2 can observe the data CPU0 is still writing. [ Impact: fix occasionally corrupted profiling records ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090501102533.007821627@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 41aed4270057..f776851f8c4b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -358,10 +358,13 @@ struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; /* nr of data pages */ - atomic_t wakeup; /* POLL_ for wakeups */ + atomic_t poll; /* POLL_ for wakeups */ atomic_t head; /* write position */ atomic_t events; /* event limit */ + atomic_t wakeup_head; /* completed head */ + atomic_t lock; /* concurrent writes */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; -- cgit v1.2.3 From 9ee1983c9aa18f12388ef660d0c76a23dc112959 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 30 Apr 2009 13:29:47 -0400 Subject: tracing: add irq tracepoint documentation Document irqs for the newly created docbook. [ Impact: add documentation ] Signed-off-by: Jason Baron Acked-by: Randy Dunlap Cc: akpm@linux-foundation.org Cc: rostedt@goodmis.org Cc: fweisbec@gmail.com Cc: mathieu.desnoyers@polymtl.ca Cc: wcohen@redhat.com LKML-Reference: <73ff42be3420157667ec548e9b0e409c3cfad05f.1241107197.git.jbaron@redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/irq.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 768686467518..32a9f7ef432b 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -7,8 +7,16 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq -/* - * Tracepoint for entry of interrupt handler: +/** + * irq_handler_entry - called immediately before the irq action handler + * @irq: irq number + * @action: pointer to struct irqaction + * + * The struct irqaction pointed to by @action contains various + * information about the handler, including the device name, + * @action->name, and the device id, @action->dev_id. When used in + * conjunction with the irq_handler_exit tracepoint, we can figure + * out irq handler latencies. */ TRACE_EVENT(irq_handler_entry, @@ -29,8 +37,16 @@ TRACE_EVENT(irq_handler_entry, TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) ); -/* - * Tracepoint for return of an interrupt handler: +/** + * irq_handler_exit - called immediately after the irq action handler returns + * @irq: irq number + * @action: pointer to struct irqaction + * @ret: return value + * + * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding + * @action->handler scuccessully handled this irq. Otherwise, the irq might be + * a shared irq line, or the irq was not handled successfully. Can be used in + * conjunction with the irq_handler_entry to understand irq handler latencies. */ TRACE_EVENT(irq_handler_exit, @@ -52,6 +68,17 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ TRACE_EVENT(softirq_entry, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -71,6 +98,17 @@ TRACE_EVENT(softirq_entry, TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) ); +/** + * softirq_exit - called immediately after the softirq handler returns + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the struct softirq_action + * that has handled the softirq. By subtracting the @vec pointer from + * the @h pointer, we can determine the softirq number. Also, when used in + * combination with the softirq_entry tracepoint we can determine the softirq + * latency. + */ TRACE_EVENT(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), -- cgit v1.2.3 From 15e957d08dd4a841359cfec59ecb74041e0097aa Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 30 Apr 2009 01:17:50 -0700 Subject: x86/irq: use move_irq_desc() in create_irq_nr() move_irq_desc() will try to move irq_desc to the home node if the allocated one is not correct, in create_irq_nr(). ( This can happen on devices that are on different nodes that are using MSI, when drivers are loaded and unloaded randomly. ) v2: fix non-smp build v3: add NUMA_IRQ_DESC to eliminate #ifdefs [ Impact: improve irq descriptor locality on NUMA systems ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F95EAE.2050903@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 4b95ddb5304b..eedbb8e5e0cc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -206,9 +206,16 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; -#else /* CONFIG_SPARSE_IRQ */ +#endif + +#ifdef CONFIG_NUMA_IRQ_DESC extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); -#endif /* CONFIG_SPARSE_IRQ */ +#else +static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) +{ + return desc; +} +#endif extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); -- cgit v1.2.3 From a25cbd045a2ffc42787d4dbcbb9c7118f5f42732 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 1 May 2009 14:45:46 +0900 Subject: clocksource: setup mult_orig in clocksource_enable() Setup clocksource mult_orig in clocksource_enable(). Clocksource drivers can save power by using keeping the device clock disabled while the clocksource is unused. In practice this means that the enable() and disable() callbacks perform clk_enable() and clk_disable(). The enable() callback may also use clk_get_rate() to get the clock rate from the clock framework. This information can then be used to calculate the shift and mult variables. Currently the mult_orig variable is setup from mult at registration time only. This is conflicting with the above case since the clock is disabled and the mult variable is not yet calculated at the time of registration. Moving the mult_orig setup code to clocksource_enable() allows us to both handle the common case with no enable() callback and the mult-changed-after-enable() case. [ Impact: allow dynamic clock source usage ] Signed-off-by: Magnus Damm LKML-Reference: <20090501054546.8193.10688.sendpatchset@rx1.opensource.se> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 5a40d14daa9f..c56457c8334e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -288,7 +288,15 @@ static inline cycle_t clocksource_read(struct clocksource *cs) */ static inline int clocksource_enable(struct clocksource *cs) { - return cs->enable ? cs->enable(cs) : 0; + int ret = 0; + + if (cs->enable) + ret = cs->enable(cs); + + /* save mult_orig on enable */ + cs->mult_orig = cs->mult; + + return ret; } /** -- cgit v1.2.3 From 7d27558c4138ac6b3684dea35c2f4379b940a7dd Mon Sep 17 00:00:00 2001 From: john stultz Date: Fri, 1 May 2009 13:10:26 -0700 Subject: timekeeping: create arch_gettimeoffset infrastructure Some arches don't supply their own clocksource. This is mainly the case in architectures that get their inter-tick times by reading the counter on their interval timer. Since these timers wrap every tick, they're not really useful as clocksources. Wrapping them to act like one is possible but not very efficient. So we provide a callout these arches can implement for use with the jiffies clocksource to provide finer then tick granular time. [ Impact: ease the migration to generic time keeping ] Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/time.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 242f62499bb7..ea16c1a01d51 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -113,6 +113,21 @@ struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) +/* Some architectures do not supply their own clocksource. + * This is mainly the case in architectures that get their + * inter-tick times by reading the counter on their interval + * timer. Since these timers wrap every tick, they're not really + * useful as clocksources. Wrapping them to act like one is possible + * but not very efficient. So we provide a callout these arches + * can implement for use with the jiffies clocksource to provide + * finer then tick granular time. + */ +#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET +extern u32 arch_gettimeoffset(void); +#else +static inline u32 arch_gettimeoffset(void) { return 0; } +#endif + extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); -- cgit v1.2.3 From 33f503c96c976fd585dedb76514ca6cb286e60d9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 2 May 2009 12:24:55 +0100 Subject: ASoC: Use a shared define for AC97 CODEC data formats The AC97 wire format is completely fixed so CODECs don't have any choice about the formats they accept but controllers accept a variety of data formats and render them down onto the bus. Have a shared define so all the CODEC drivers will interoperate with any of our controller drivers. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 22b729fbbf84..ea07b4bd5161 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -96,6 +96,9 @@ struct snd_pcm_substream; #define SND_SOC_CLOCK_IN 0 #define SND_SOC_CLOCK_OUT 1 +#define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\ + SNDRV_PCM_FMTBIT_S32_LE) + struct snd_soc_dai_ops; struct snd_soc_dai; struct snd_ac97_bus_ops; -- cgit v1.2.3 From 4072604b9dd18f25a98cc0f4d3d4553ed1ad4152 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 2 May 2009 12:28:25 +0100 Subject: ASoC: Remove unused DAI format defines The defines for TDM and synchronous clocks are not used - they are mostly a legacy of the automatic clocking configuration. TDM will require configuration of the number of timeslots and which ones to use so can't be fit into the DAI format and synchronous mode is handled by symmetric_rates (and needs to be done by constraints rather than when the DAI format is being configured). Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index ea07b4bd5161..a997c2cac63f 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -44,24 +44,6 @@ struct snd_pcm_substream; #define SND_SOC_DAIFMT_CONT (0 << 4) /* continuous clock */ #define SND_SOC_DAIFMT_GATED (1 << 4) /* clock is gated */ -/* - * DAI Left/Right Clocks. - * - * Specifies whether the DAI can support different samples for similtanious - * playback and capture. This usually requires a seperate physical frame - * clock for playback and capture. - */ -#define SND_SOC_DAIFMT_SYNC (0 << 5) /* Tx FRM = Rx FRM */ -#define SND_SOC_DAIFMT_ASYNC (1 << 5) /* Tx FRM ~ Rx FRM */ - -/* - * TDM - * - * Time Division Multiplexing. Allows PCM data to be multplexed with other - * data on the DAI. - */ -#define SND_SOC_DAIFMT_TDM (1 << 6) - /* * DAI hardware signal inversions. * -- cgit v1.2.3 From d5ed4c2e5ce9f5f6fd6a5a39ee1196a1f8a46eed Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Apr 2009 07:02:49 +0000 Subject: clocksource: SuperH MTU2 Timer driver This patch adds a MTU2 driver for the SuperH architecture. The MTU2 driver is a platform driver with early platform support to allow using a MTU2 channel as only clockevent during system bootup. Clocksource on sh2a is currently unsupported due to code generation issues with 64-bit math, so at this point only periodic clockevent support is in place. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- include/linux/sh_mtu2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/sh_mtu2.h (limited to 'include') diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h new file mode 100644 index 000000000000..a98876340ff4 --- /dev/null +++ b/include/linux/sh_mtu2.h @@ -0,0 +1,12 @@ +#ifndef __SH_MTU2_H__ +#define __SH_MTU2_H__ + +struct sh_mtu2_config { + char *name; + int channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; +}; + +#endif /* __SH_MTU2_H__ */ -- cgit v1.2.3 From 9570ef20423b549757aa484ad388f9a7d5bdc4d9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 1 May 2009 06:51:00 +0000 Subject: clocksource: SuperH TMU Timer driver This patch adds a TMU driver for the SuperH architecture. The TMU driver is a platform driver with early platform support to allow using a TMU channel as clockevent or clocksource during system bootup or later. Clocksource or clockevent can be selected. Both periodic and oneshot clockevents are supported. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- include/linux/sh_tmu.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/sh_tmu.h (limited to 'include') diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h new file mode 100644 index 000000000000..b1195e8a37d0 --- /dev/null +++ b/include/linux/sh_tmu.h @@ -0,0 +1,13 @@ +#ifndef __SH_TMU_H__ +#define __SH_TMU_H__ + +struct sh_tmu_config { + char *name; + unsigned long channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; + unsigned long clocksource_rating; +}; + +#endif /* __SH_TMU_H__ */ -- cgit v1.2.3 From 46a12f7426d71cabc08972cf8d3ffdd441d26a3a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 3 May 2009 17:57:17 +0900 Subject: sh: Consolidate MTU2/CMT/TMU timer platform data. All of the SH timers use a roughly identical structure for platform data, which presently is broken out for each block. Consolidate all of these definitions, as there is no reason for them to be broken out in the first place. Signed-off-by: Paul Mundt --- include/linux/sh_cmt.h | 13 ------------- include/linux/sh_mtu2.h | 12 ------------ include/linux/sh_timer.h | 13 +++++++++++++ include/linux/sh_tmu.h | 13 ------------- 4 files changed, 13 insertions(+), 38 deletions(-) delete mode 100644 include/linux/sh_cmt.h delete mode 100644 include/linux/sh_mtu2.h create mode 100644 include/linux/sh_timer.h delete mode 100644 include/linux/sh_tmu.h (limited to 'include') diff --git a/include/linux/sh_cmt.h b/include/linux/sh_cmt.h deleted file mode 100644 index 68cacde5954f..000000000000 --- a/include/linux/sh_cmt.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __SH_CMT_H__ -#define __SH_CMT_H__ - -struct sh_cmt_config { - char *name; - unsigned long channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; - unsigned long clocksource_rating; -}; - -#endif /* __SH_CMT_H__ */ diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h deleted file mode 100644 index a98876340ff4..000000000000 --- a/include/linux/sh_mtu2.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __SH_MTU2_H__ -#define __SH_MTU2_H__ - -struct sh_mtu2_config { - char *name; - int channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; -}; - -#endif /* __SH_MTU2_H__ */ diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h new file mode 100644 index 000000000000..864bd56bd3b0 --- /dev/null +++ b/include/linux/sh_timer.h @@ -0,0 +1,13 @@ +#ifndef __SH_TIMER_H__ +#define __SH_TIMER_H__ + +struct sh_timer_config { + char *name; + long channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; + unsigned long clocksource_rating; +}; + +#endif /* __SH_TIMER_H__ */ diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h deleted file mode 100644 index b1195e8a37d0..000000000000 --- a/include/linux/sh_tmu.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __SH_TMU_H__ -#define __SH_TMU_H__ - -struct sh_tmu_config { - char *name; - unsigned long channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; - unsigned long clocksource_rating; -}; - -#endif /* __SH_TMU_H__ */ -- cgit v1.2.3 From 0d905bca23aca5c86a10ee101bcd3b1abbd40b25 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2009 19:13:30 +0200 Subject: perf_counter: initialize the per-cpu context earlier percpu scheduling for perfcounters wants to take the context lock, but that lock first needs to be initialized. Currently it is an early_initcall() - but that is too late, the task tick runs much sooner than that. Call it explicitly from the scheduler init sequence instead. [ Impact: fix access-before-init crash ] LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f776851f8c4b..a356fa69796c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -573,6 +573,8 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_priv; +extern void perf_counter_init(void); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -600,9 +602,10 @@ perf_counter_mmap(unsigned long addr, unsigned long len, static inline void perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) { } + unsigned long pgoff, struct file *file) { } static inline void perf_counter_comm(struct task_struct *tsk) { } +static inline void perf_counter_init(void) { } #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From bbd993077d788589a86a718ba7a7895ba5e71a17 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 May 2009 10:27:38 +0100 Subject: ASoC: Remove redundant codec pointer from DAIs The DAI structure has two pointers to the codec, one in the body of the DAI and one in a union for a parent pointer. Drop the parent pointer version. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index a997c2cac63f..496dc30457b7 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -205,11 +205,8 @@ struct snd_soc_dai { /* DAI private data */ void *private_data; - /* parent codec/platform */ - union { - struct snd_soc_codec *codec; - struct snd_soc_platform *platform; - }; + /* parent platform */ + struct snd_soc_platform *platform; struct list_head list; }; -- cgit v1.2.3 From 4bbe1ddf89a5ba3ec30fe5980912d8bda3a3cbb2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Oct 2008 03:07:14 +0200 Subject: ALSA: Add extra delay count in PCM Added runtime->delay field to adjust the delayed samples for snd_pcm_delay(). Typically a hardware FIFO length is stored in this field, so that the extra delay between hwptr and applptr can be computed. Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index c17296891617..267effddb070 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -270,6 +270,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t hw_ptr_base; /* Position at buffer restart */ snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */ unsigned long hw_ptr_jiffies; /* Time when hw_ptr is updated */ + snd_pcm_sframes_t delay; /* extra delay; typically FIFO size */ /* -- HW params -- */ snd_pcm_access_t access; /* access mode */ -- cgit v1.2.3 From f0d2c681ac0a85142fc8abe65fc33fcad35cb9b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 13:43:37 -0400 Subject: ring-buffer: add counters for commit overrun and nmi dropped entries The WARN_ON in the ring buffer when a commit is preempted and the buffer is filled by preceding writes can happen in normal operations. The WARN_ON makes it look like a bug, not to mention, because it does not stop tracing and calls printk which can also recurse, this is prone to deadlock (the WARN_ON is not in a position to recurse). This patch removes the WARN_ON and replaces it with a counter that can be retrieved by a tracer. This counter is called commit_overrun. While at it, I added a nmi_dropped counter to count any time an NMI entry is dropped because the NMI could not take the spinlock. [ Impact: prevent deadlock by printing normal case warning ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1c2f80911fbe..f1345828c7c5 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -153,6 +153,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3 From c66de4a5be7913247bd83d79168f8e4420c9cfbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 May 2009 17:50:22 +0200 Subject: perf_counter: uncouple data_head updates from wakeups Keep data_head up-to-date irrespective of notifications. This fixes the case where you disable a counter and don't get a notification for the last few pending events, and it also allows polling usage. [ Impact: increase precision of perfcounter mmap-ed fields ] Suggested-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090505155436.925084300@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a356fa69796c..17b63105f2aa 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -362,9 +362,11 @@ struct perf_mmap_data { atomic_t head; /* write position */ atomic_t events; /* event limit */ - atomic_t wakeup_head; /* completed head */ + atomic_t done_head; /* completed head */ atomic_t lock; /* concurrent writes */ + atomic_t wakeup; /* needs a wakeup */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; -- cgit v1.2.3 From 6de6a7b95705b859b61430fa3afa1403034eb3e6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 May 2009 17:50:23 +0200 Subject: perf_counter: add ioctl(PERF_COUNTER_IOC_RESET) Provide a way to reset an existing counter - this eases PAPI libraries around perfcounters. Similar to read() it doesn't collapse pending child counters. [ Impact: new perfcounter fd ioctl method to reset counters ] Suggested-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090505155437.022272933@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 17b63105f2aa..0fcbf34a4f73 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -160,6 +160,7 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) #define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) +#define PERF_COUNTER_IOC_RESET _IO ('$', 3) /* * Structure of the page that can be mapped via mmap -- cgit v1.2.3 From c5078f78b455fbf67ea71442c7e7ca8acf9ff095 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 May 2009 17:50:24 +0200 Subject: perf_counter: provide an mlock threshold Provide a threshold to relax the mlock accounting, increasing usability. Each counter gets perf_counter_mlock_kb for free. [ Impact: allow more mmap buffering ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090505155437.112113632@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0fcbf34a4f73..00081d84169f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -358,6 +358,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; /* nr of data pages */ + int nr_locked; /* nr pages mlocked */ atomic_t poll; /* POLL_ for wakeups */ atomic_t head; /* write position */ @@ -575,6 +576,7 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_priv; +extern int sysctl_perf_counter_mlock; extern void perf_counter_init(void); -- cgit v1.2.3 From 2df75e415709ad12862028916c772c1f377f6a7c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 6 May 2009 10:33:04 +0800 Subject: tracing/events: fix memory leak when unloading module When unloading a module, memory allocated by init_preds() and trace_define_field() is not freed. [ Impact: fix memory leak ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4A00F6E0.3040503@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5fff40c9ff59..662c1becf367 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -116,6 +116,7 @@ struct ftrace_event_call { #define MAX_FILTER_STR_VAL 128 extern int init_preds(struct ftrace_event_call *call); +extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern int filter_current_check_discard(struct ftrace_event_call *call, void *rec, -- cgit v1.2.3 From de1d7286060430e79a1d50ad6e5fee8fe863c5f6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 5 May 2009 16:49:59 +0800 Subject: tracepoint: trace_sched_migrate_task(): remove parameter The orig_cpu parameter in trace_sched_migrate_task() is not necessary, it can be got by using task_cpu(p) in the probe. [ Impact: micro-optimization ] Signed-off-by: Mathieu Desnoyers [ modified from Mathieu's patch. The original patch is at: http://marc.info/?l=linux-kernel&m=123791201716239&w=2 ] Signed-off-by: Xiao Guangrong Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: Li Zefan Cc: zhaolei@cn.fujitsu.com Cc: laijs@cn.fujitsu.com LKML-Reference: <49FFFDB7.1050402@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index ffa1cab586b9..dd4033cf5b09 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -180,9 +180,9 @@ TRACE_EVENT(sched_switch, */ TRACE_EVENT(sched_migrate_task, - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TP_PROTO(struct task_struct *p, int dest_cpu), - TP_ARGS(p, orig_cpu, dest_cpu), + TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -196,7 +196,7 @@ TRACE_EVENT(sched_migrate_task, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; + __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), -- cgit v1.2.3 From a42aaa3bbce85ac487ad4fad5db99e8e91b7aac1 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:27:26 -0400 Subject: blktrace: correct remap names This attempts to clarify names utilized during block I/O remap operations (partition, volume manager). It correctly matches up the /from/ information for both device & sector. This takes in the concept from Kosaki Motohiro and extends it to include better naming for the "device_from" field. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF4FAE.3000301@hp.com> Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 4 ++-- include/trace/block.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 62763c952854..82b4636030e9 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,9 +116,9 @@ struct blk_io_trace { * The remap event */ struct blk_io_trace_remap { - __be32 device; __be32 device_from; - __be64 sector; + __be32 device_to; + __be64 sector_from; }; enum { diff --git a/include/trace/block.h b/include/trace/block.h index 25b7068b819e..87f6456fd32e 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TP_ARGS(q, bio, dev, from, to)); + sector_t to, sector_t from), + TP_ARGS(q, bio, dev, to, from)); #endif -- cgit v1.2.3 From 22a7c31a9659deaddafbbcec6562d44141e84474 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:35:08 -0400 Subject: blktrace: from-sector redundant in trace_block_remap Remove redundant from-sector parameter: it's /always/ the bio's sector passed in. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF517C.7000503@hp.com> Signed-off-by: Ingo Molnar --- include/trace/block.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/block.h b/include/trace/block.h index 87f6456fd32e..8ac945b7746e 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t to, sector_t from), - TP_ARGS(q, bio, dev, to, from)); + sector_t to), + TP_ARGS(q, bio, dev, to)); #endif -- cgit v1.2.3 From 08ce4c91e44d51bb6c946f2756825a462d53c545 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 7 Apr 2009 23:40:39 +0200 Subject: dlm: Make name input parameter of {,dlm_}new_lockspace() const | fs/gfs2/lock_dlm.c:207: warning: passing argument 1 of 'dlm_new_lockspace' discards qualifiers from pointer target type Signed-off-by: Geert Uytterhoeven Signed-off-by: David Teigland --- include/linux/dlm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index b9cd38603fd8..0b3518c42356 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -81,8 +81,8 @@ struct dlm_lksb { * the cluster, the calling node joins it. */ -int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace, - uint32_t flags, int lvblen); +int dlm_new_lockspace(const char *name, int namelen, + dlm_lockspace_t **lockspace, uint32_t flags, int lvblen); /* * dlm_release_lockspace -- cgit v1.2.3 From 9080b72819650c3a757d173a19bc930d603b79d6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 21 Jan 2009 15:13:58 +0000 Subject: sh-sci: remove early_sci_setup() Remove unused early_sci_setup() function from sh-sci. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- include/linux/serial_sci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 893cc53486bc..717059d6791f 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -27,6 +27,4 @@ struct plat_sci_port { upf_t flags; /* UPF_* flags */ }; -int early_sci_setup(struct uart_port *port); - #endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3 From 501b825d01efb93766c87d29f299851152cf4eb0 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 21 Jan 2009 15:14:30 +0000 Subject: sh-sci: improve clock framework support Use enable/disable hooks for clock framework integration. Make sure we control the clock for the serial console as well. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- include/linux/serial_sci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 717059d6791f..1c297ddc9d5a 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -25,6 +25,7 @@ struct plat_sci_port { unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ unsigned int type; /* SCI / SCIF / IRDA */ upf_t flags; /* UPF_* flags */ + char *clk; /* clock string */ }; #endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3 From 3df5edad87a998273aa5a9a8c728c05d855ad00e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2009 18:52:22 +0200 Subject: perf_counter: rework ioctl()s Corey noticed that ioctl()s on grouped counters didn't work on the whole group. This extends the ioctl() interface to take a second argument that is interpreted as a flags field. We then provide PERF_IOC_FLAG_GROUP to toggle the behaviour. Having this flag gives the greatest flexibility, allowing you to individually enable/disable/reset counters in a group, or all together. [ Impact: fix group counter enable/disable semantics ] Reported-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090508170028.837558214@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 00081d84169f..88f863ec2748 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -157,10 +157,14 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_ENABLE _IOW('$', 0, u32) +#define PERF_COUNTER_IOC_DISABLE _IOW('$', 1, u32) #define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) -#define PERF_COUNTER_IOC_RESET _IO ('$', 3) +#define PERF_COUNTER_IOC_RESET _IOW('$', 3, u32) + +enum perf_counter_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; /* * Structure of the page that can be mapped via mmap -- cgit v1.2.3 From a85f61abe11a46553c4562e74edb27ebc782aeb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2009 18:52:23 +0200 Subject: perf_counter: add PERF_RECORD_CONFIG Much like CONFIG_RECORD_GROUP records the hw_event.config to identify the values, allow to record this for all counters. [ Impact: extend perfcounter output record format ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090508170028.923228280@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 88f863ec2748..0e6303d36c66 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -104,6 +104,7 @@ enum perf_counter_record_format { PERF_RECORD_ADDR = 1U << 3, PERF_RECORD_GROUP = 1U << 4, PERF_RECORD_CALLCHAIN = 1U << 5, + PERF_RECORD_CONFIG = 1U << 6, }; /* @@ -258,6 +259,7 @@ enum perf_event_type { * { u32 pid, tid; } && PERF_RECORD_TID * { u64 time; } && PERF_RECORD_TIME * { u64 addr; } && PERF_RECORD_ADDR + * { u64 config; } && PERF_RECORD_CONFIG * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP -- cgit v1.2.3 From f370e1e2f195ec1e6420e26fc83e0319595db578 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2009 18:52:24 +0200 Subject: perf_counter: add PERF_RECORD_CPU Allow recording the CPU number the event was generated on. RFC: this leaves a u32 as reserved, should we fill in the node_id() there, or leave this open for future extention, as userspace can already easily do the cpu->node mapping if needed. [ Impact: extend perfcounter output record format ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090508170029.008627711@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0e6303d36c66..614f921d616a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -105,6 +105,7 @@ enum perf_counter_record_format { PERF_RECORD_GROUP = 1U << 4, PERF_RECORD_CALLCHAIN = 1U << 5, PERF_RECORD_CONFIG = 1U << 6, + PERF_RECORD_CPU = 1U << 7, }; /* @@ -260,6 +261,7 @@ enum perf_event_type { * { u64 time; } && PERF_RECORD_TIME * { u64 addr; } && PERF_RECORD_ADDR * { u64 config; } && PERF_RECORD_CONFIG + * { u32 cpu, res; } && PERF_RECORD_CPU * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP -- cgit v1.2.3 From 4671c79408a3f8a5a6a45e39c4c164dada3a5678 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 8 May 2009 16:27:41 -0400 Subject: tracing: add trace_set_clr_event to export event enabling function Other parts of the kernel may need to be able to enable or disable specific events. Especially parts that create trace events. [ Impact: allow enabling of trace events by those that create the event ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 662c1becf367..bae51ddfabd3 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -127,6 +127,8 @@ extern int trace_define_field(struct ftrace_event_call *call, char *type, #define is_signed_type(type) (((type)(-1)) < 0) +int trace_set_clr_event(const char *system, const char *event, int set); + /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a -- cgit v1.2.3 From 5e751e992f3fb08ba35e1ca8095ec8fbf9eda523 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 8 May 2009 13:55:22 +0100 Subject: CRED: Rename cred_exec_mutex to reflect that it's a guard against ptrace Rename cred_exec_mutex to reflect that it's a guard against foreign intervention on a process's credential state, such as is made by ptrace(). The attachment of a debugger to a process affects execve()'s calculation of the new credential state - _and_ also setprocattr()'s calculation of that state. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641f..7f54ba942429 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -145,8 +145,8 @@ extern struct cred init_cred; .group_leader = &tsk, \ .real_cred = &init_cred, \ .cred = &init_cred, \ - .cred_exec_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3fa82b353c98..5932ace22400 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1247,7 +1247,9 @@ struct task_struct { * credentials (COW) */ const struct cred *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock -- cgit v1.2.3 From c3a4d78c580de4edc9ef0f7c59812fb02ceb037f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:37 +0900 Subject: block: add rq->resid_len rq->data_len served two purposes - the length of data buffer on issue and the residual count on completion. This duality creates some headaches. First of all, block layer and low level drivers can't really determine what rq->data_len contains while a request is executing. It could be the total request length or it coulde be anything else one of the lower layers is using to keep track of residual count. This complicates things because blk_rq_bytes() and thus [__]blk_end_request_all() relies on rq->data_len for PC commands. Drivers which want to report residual count should first cache the total request length, update rq->data_len and then complete the request with the cached data length. Secondly, it makes requests default to reporting full residual count, ie. reporting that no data transfer occurred. The residual count is an exception not the norm; however, the driver should clear rq->data_len to zero to signify the normal cases while leaving it alone means no data transfer occurred at all. This reverse default behavior complicates code unnecessarily and renders block PC on some drivers (ide-tape/floppy) unuseable. This patch adds rq->resid_len which is used only for residual count. While at it, remove now unnecessasry blk_rq_bytes() caching in ide_pc_intr() as rq->data_len is not changed anymore. Boaz : spotted missing conversion in osd Sergei : spotted too early conversion to blk_rq_bytes() in ide-tape [ Impact: cleanup residual count handling, report 0 resid by default ] Signed-off-by: Tejun Heo Cc: James Bottomley Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Mike Miller Cc: Eric Moore Cc: Alan Stern Cc: FUJITA Tomonori Cc: Doug Gilbert Cc: Mike Miller Cc: Eric Moore Cc: Darrick J. Wong Cc: Pete Zaitcev Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a5b1bd6582c..6a967cad89fa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -229,6 +229,7 @@ struct request { unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; + unsigned int resid_len; /* residual count */ void *sense; unsigned long deadline; -- cgit v1.2.3 From 5b93629b4509c03ffa87a9316412fedf6f58cb37 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:38 +0900 Subject: block: implement blk_rq_pos/[cur_]sectors() and convert obvious ones Implement accessors - blk_rq_pos(), blk_rq_sectors() and blk_rq_cur_sectors() which return rq->hard_sector, rq->hard_nr_sectors and rq->hard_cur_sectors respectively and convert direct references of the said fields to the accessors. This is in preparation of request data length handling cleanup. Geert : suggested adding const to struct request * parameter to accessors Sergei : spotted error in patch description [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Geert Uytterhoeven Acked-by: Stephen Rothwell Tested-by: Grant Likely Acked-by: Grant Likely Ackec-by: Sergei Shtylyov Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a967cad89fa..4e5f85598728 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -832,13 +832,30 @@ static inline void blk_run_address_space(struct address_space *mapping) extern void blkdev_dequeue_request(struct request *req); /* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment */ +static inline sector_t blk_rq_pos(const struct request *rq) +{ + return rq->hard_sector; +} + extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); +static inline unsigned int blk_rq_sectors(const struct request *rq) +{ + return rq->hard_nr_sectors; +} + +static inline unsigned int blk_rq_cur_sectors(const struct request *rq) +{ + return rq->hard_cur_sectors; +} + /* * Request completion related functions. * -- cgit v1.2.3 From 83096ebf1263b2c1ee5e653ba37d993d02e3eb7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:39 +0900 Subject: block: convert to pos and nr_sectors accessors With recent cleanups, there is no place where low level driver directly manipulates request fields. This means that the 'hard' request fields always equal the !hard fields. Convert all rq->sectors, nr_sectors and current_nr_sectors references to accessors. While at it, drop superflous blk_rq_pos() < 0 test in swim.c. [ Impact: use pos and nr_sectors accessors ] Signed-off-by: Tejun Heo Acked-by: Geert Uytterhoeven Tested-by: Grant Likely Acked-by: Grant Likely Tested-by: Adrian McMenamin Acked-by: Adrian McMenamin Acked-by: Mike Miller Cc: James Bottomley Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Eric Moore Cc: Alan Stern Cc: FUJITA Tomonori Cc: Pete Zaitcev Cc: Stephen Rothwell Cc: Paul Clements Cc: Tim Waugh Cc: Jeff Garzik Cc: Jeremy Fitzhardinge Cc: Alex Dubov Cc: David Woodhouse Cc: Martin Schwidefsky Cc: Dario Ballabio Cc: David S. Miller Cc: Rusty Russell Cc: unsik Kim Cc: Laurent Vivier Signed-off-by: Jens Axboe --- include/scsi/scsi_cmnd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 43b50d36925c..3878d1dc7f59 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -270,7 +270,7 @@ static inline unsigned char scsi_get_prot_type(struct scsi_cmnd *scmd) static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) { - return scmd->request->sector; + return blk_rq_pos(scmd->request); } static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd) -- cgit v1.2.3 From 2e46e8b27aa57c6bd34b3102b40ee4d0144b4fab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:41 +0900 Subject: block: drop request->hard_* and *nr_sectors struct request has had a few different ways to represent some properties of a request. ->hard_* represent block layer's view of the request progress (completion cursor) and the ones without the prefix are supposed to represent the issue cursor and allowed to be updated as necessary by the low level drivers. The thing is that as block layer supports partial completion, the two cursors really aren't necessary and only cause confusion. In addition, manual management of request detail from low level drivers is cumbersome and error-prone at the very least. Another interesting duplicate fields are rq->[hard_]nr_sectors and rq->{hard_cur|current}_nr_sectors against rq->data_len and rq->bio->bi_size. This is more convoluted than the hard_ case. rq->[hard_]nr_sectors are initialized for requests with bio but blk_rq_bytes() uses it only for !pc requests. rq->data_len is initialized for all request but blk_rq_bytes() uses it only for pc requests. This causes good amount of confusion throughout block layer and its drivers and determining the request length has been a bit of black magic which may or may not work depending on circumstances and what the specific LLD is actually doing. rq->{hard_cur|current}_nr_sectors represent the number of sectors in the contiguous data area at the front. This is mainly used by drivers which transfers data by walking request segment-by-segment. This value always equals rq->bio->bi_size >> 9. However, data length for pc requests may not be multiple of 512 bytes and using this field becomes a bit confusing. In general, having multiple fields to represent the same property leads only to confusion and subtle bugs. With recent block low level driver cleanups, no driver is accessing or manipulating these duplicate fields directly. Drop all the duplicates. Now rq->sector means the current sector, rq->data_len the current total length and rq->bio->bi_size the current segment length. Everything else is defined in terms of these three and available only through accessors. * blk_recalc_rq_sectors() is collapsed into blk_update_request() and now handles pc and fs requests equally other than rq->sector update. This means that now pc requests can use partial completion too (no in-kernel user yet tho). * bio_cur_sectors() is replaced with bio_cur_bytes() as block layer now uses byte count as the primary data length. * blk_rq_pos() is now guranteed to be always correct. In-block users converted. * blk_rq_bytes() is now guaranteed to be always valid as is blk_rq_sectors(). In-block users converted. * blk_rq_sectors() is now guaranteed to equal blk_rq_bytes() >> 9. More convenient one is used. * blk_rq_bytes() and blk_rq_cur_bytes() are now inlined and take const pointer to request. [ Impact: API cleanup, single way to represent one property of a request ] Signed-off-by: Tejun Heo Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++--- include/linux/blkdev.h | 37 ++++++++++++++++--------------------- include/linux/elevator.h | 2 +- 3 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index f37ca8c726ba..d30ec6f30dd7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -218,12 +218,12 @@ struct bio { #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) -static inline unsigned int bio_cur_sectors(struct bio *bio) +static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) - return bio_iovec(bio)->bv_len >> 9; + return bio_iovec(bio)->bv_len; else /* dataless requests such as discard */ - return bio->bi_size >> 9; + return bio->bi_size; } static inline void *bio_data(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4e5f85598728..ce2bf5efa9ba 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -166,19 +166,8 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; - /* Maintain bio traversal state for part by part I/O submission. - * hard_* are block layer internals, no driver should touch them! - */ - - sector_t sector; /* next sector to submit */ - sector_t hard_sector; /* next sector to complete */ - unsigned long nr_sectors; /* no. of sectors left to submit */ - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ - /* no. of sectors left to submit in the current segment */ - unsigned int current_nr_sectors; - - /* no. of sectors left to complete in the current segment */ - unsigned int hard_cur_sectors; + sector_t sector; /* sector cursor */ + unsigned int data_len; /* total data len, don't access directly */ struct bio *bio; struct bio *biotail; @@ -226,7 +215,6 @@ struct request { unsigned char __cmd[BLK_MAX_CDB]; unsigned char *cmd; - unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; unsigned int resid_len; /* residual count */ @@ -840,20 +828,27 @@ extern void blkdev_dequeue_request(struct request *req); */ static inline sector_t blk_rq_pos(const struct request *rq) { - return rq->hard_sector; + return rq->sector; +} + +static inline unsigned int blk_rq_bytes(const struct request *rq) +{ + return rq->data_len; } -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); +static inline int blk_rq_cur_bytes(const struct request *rq) +{ + return rq->bio ? bio_cur_bytes(rq->bio) : 0; +} static inline unsigned int blk_rq_sectors(const struct request *rq) { - return rq->hard_nr_sectors; + return blk_rq_bytes(rq) >> 9; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { - return rq->hard_cur_sectors; + return blk_rq_cur_bytes(rq) >> 9; } /* @@ -928,7 +923,7 @@ static inline void blk_end_request_all(struct request *rq, int error) */ static inline bool blk_end_request_cur(struct request *rq, int error) { - return blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } /** @@ -981,7 +976,7 @@ static inline void __blk_end_request_all(struct request *rq, int error) */ static inline bool __blk_end_request_cur(struct request *rq, int error) { - return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } extern void blk_complete_request(struct request *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c59b769f62b0..4e462878c9ca 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -171,7 +171,7 @@ enum { ELV_MQUEUE_MUST, }; -#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) +#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* -- cgit v1.2.3 From a2dec7b36364a5cc564c4d76cf16d2e7d33f5c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:44 +0900 Subject: block: hide request sector and data_len Block low level drivers for some reason have been pretty good at abusing block layer API. Especially struct request's fields tend to get violated in all possible ways. Make it clear that low level drivers MUST NOT access or manipulate rq->sector and rq->data_len directly by prefixing them with double underscores. This change is also necessary to break build of out-of-tree codes which assume the previous block API where internal fields can be manipulated and rq->data_len carries residual count on completion. [ Impact: hide internal fields, block API change ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ce2bf5efa9ba..c75580345700 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -166,8 +166,9 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; - sector_t sector; /* sector cursor */ - unsigned int data_len; /* total data len, don't access directly */ + /* the following two fields are internal, NEVER access directly */ + sector_t __sector; /* sector cursor */ + unsigned int __data_len; /* total data len */ struct bio *bio; struct bio *biotail; @@ -828,12 +829,12 @@ extern void blkdev_dequeue_request(struct request *req); */ static inline sector_t blk_rq_pos(const struct request *rq) { - return rq->sector; + return rq->__sector; } static inline unsigned int blk_rq_bytes(const struct request *rq) { - return rq->data_len; + return rq->__data_len; } static inline int blk_rq_cur_bytes(const struct request *rq) -- cgit v1.2.3 From 9934c8c04561413609d2bc38c6b9f268cba774a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 May 2009 11:54:16 +0900 Subject: block: implement and enforce request peek/start/fetch Till now block layer allowed two separate modes of request execution. A request is always acquired from the request queue via elv_next_request(). After that, drivers are free to either dequeue it or process it without dequeueing. Dequeue allows elv_next_request() to return the next request so that multiple requests can be in flight. Executing requests without dequeueing has its merits mostly in allowing drivers for simpler devices which can't do sg to deal with segments only without considering request boundary. However, the benefit this brings is dubious and declining while the cost of the API ambiguity is increasing. Segment based drivers are usually for very old or limited devices and as converting to dequeueing model isn't difficult, it doesn't justify the API overhead it puts on block layer and its more modern users. Previous patches converted all block low level drivers to dequeueing model. This patch completes the API transition by... * renaming elv_next_request() to blk_peek_request() * renaming blkdev_dequeue_request() to blk_start_request() * adding blk_fetch_request() which is combination of peek and start * disallowing completion of queued (not started) requests * applying new API to all LLDs Renamings are for consistency and to break out of tree code so that it's apparent that out of tree drivers need updating. [ Impact: block request issue API cleanup, no functional change ] Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: James Bottomley Cc: Mike Miller Cc: unsik Kim Cc: Paul Clements Cc: Tim Waugh Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Laurent Vivier Cc: Jeff Garzik Cc: Jeremy Fitzhardinge Cc: Grant Likely Cc: Adrian McMenamin Cc: Stephen Rothwell Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Alex Dubov Cc: Pierre Ossman Cc: David Woodhouse Cc: Markus Lidel Cc: Stefan Weinhuber Cc: Martin Schwidefsky Cc: Pete Zaitcev Cc: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++++-- include/linux/elevator.h | 2 -- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c75580345700..6e59d3b92ff2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -818,8 +818,6 @@ static inline void blk_run_address_space(struct address_space *mapping) blk_run_backing_dev(mapping->backing_dev_info, NULL); } -extern void blkdev_dequeue_request(struct request *req); - /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request @@ -852,6 +850,13 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +/* + * Request issue related functions. + */ +extern struct request *blk_peek_request(struct request_queue *q); +extern void blk_start_request(struct request *rq); +extern struct request *blk_fetch_request(struct request_queue *q); + /* * Request completion related functions. * diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4e462878c9ca..1cb3372e65d8 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -103,10 +103,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, int); -extern void elv_dequeue_request(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *); extern int elv_queue_empty(struct request_queue *); -extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); -- cgit v1.2.3 From 1822952ba2b9f22f79019d07ebbeca31dc14b718 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 11 May 2009 17:56:07 +0900 Subject: block: let blk_end_request_all handle bidi requests blk_end_request_all() and __blk_end_request_all() should finish all bytes including bidi, by definition. That's what all bidi users need , bidi requests must be complete as a whole (partial completion is impossible). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6e59d3b92ff2..1069f4483c6e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,8 +910,12 @@ static inline bool blk_end_request(struct request *rq, int error, static inline void blk_end_request_all(struct request *rq, int error) { bool pending; + unsigned int bidi_bytes = 0; - pending = blk_end_request(rq, error, blk_rq_bytes(rq)); + if (unlikely(blk_bidi_rq(rq))) + bidi_bytes = blk_rq_bytes(rq->next_rq); + + pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } @@ -962,8 +966,12 @@ static inline bool __blk_end_request(struct request *rq, int error, static inline void __blk_end_request_all(struct request *rq, int error) { bool pending; + unsigned int bidi_bytes = 0; + + if (unlikely(blk_bidi_rq(rq))) + bidi_bytes = blk_rq_bytes(rq->next_rq); - pending = __blk_end_request(rq, error, blk_rq_bytes(rq)); + pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } -- cgit v1.2.3 From b1f744937f1be3e6d3009382a755679133cf782d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 11 May 2009 17:56:09 +0900 Subject: block: move completion related functions back to blk-core.c Let's put the completion related functions back to block/blk-core.c where they have lived. We can also unexport blk_end_bidi_request() and __blk_end_bidi_request(), which nobody uses. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 128 ++++--------------------------------------------- 1 file changed, 8 insertions(+), 120 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1069f4483c6e..f9d60a78c08a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -872,126 +872,14 @@ extern struct request *blk_fetch_request(struct request_queue *q); */ extern bool blk_update_request(struct request *rq, int error, unsigned int nr_bytes); -extern bool blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, - unsigned int bidi_bytes); -extern bool __blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, - unsigned int bidi_bytes); - -/** - * blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -static inline bool blk_end_request(struct request *rq, int error, - unsigned int nr_bytes) -{ - return blk_end_bidi_request(rq, error, nr_bytes, 0); -} - -/** - * blk_end_request_all - Helper function for drives to finish the request. - * @rq: the request to finish - * @err: %0 for success, < %0 for error - * - * Description: - * Completely finish @rq. - */ -static inline void blk_end_request_all(struct request *rq, int error) -{ - bool pending; - unsigned int bidi_bytes = 0; - - if (unlikely(blk_bidi_rq(rq))) - bidi_bytes = blk_rq_bytes(rq->next_rq); - - pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); - BUG_ON(pending); -} - -/** - * blk_end_request_cur - Helper function to finish the current request chunk. - * @rq: the request to finish the current chunk for - * @err: %0 for success, < %0 for error - * - * Description: - * Complete the current consecutively mapped chunk from @rq. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - */ -static inline bool blk_end_request_cur(struct request *rq, int error) -{ - return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); -} - -/** - * __blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Must be called with queue lock held unlike blk_end_request(). - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -static inline bool __blk_end_request(struct request *rq, int error, - unsigned int nr_bytes) -{ - return __blk_end_bidi_request(rq, error, nr_bytes, 0); -} - -/** - * __blk_end_request_all - Helper function for drives to finish the request. - * @rq: the request to finish - * @err: %0 for success, < %0 for error - * - * Description: - * Completely finish @rq. Must be called with queue lock held. - */ -static inline void __blk_end_request_all(struct request *rq, int error) -{ - bool pending; - unsigned int bidi_bytes = 0; - - if (unlikely(blk_bidi_rq(rq))) - bidi_bytes = blk_rq_bytes(rq->next_rq); - - pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); - BUG_ON(pending); -} - -/** - * __blk_end_request_cur - Helper function to finish the current request chunk. - * @rq: the request to finish the current chunk for - * @err: %0 for success, < %0 for error - * - * Description: - * Complete the current consecutively mapped chunk from @rq. Must - * be called with queue lock held. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - */ -static inline bool __blk_end_request_cur(struct request *rq, int error) -{ - return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); -} +extern bool blk_end_request(struct request *rq, int error, + unsigned int nr_bytes); +extern void blk_end_request_all(struct request *rq, int error); +extern bool blk_end_request_cur(struct request *rq, int error); +extern bool __blk_end_request(struct request *rq, int error, + unsigned int nr_bytes); +extern void __blk_end_request_all(struct request *rq, int error); +extern bool __blk_end_request_cur(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); -- cgit v1.2.3 From 79c5d3ce614d8fe706545c7bca2158b63db6bb5e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 11 May 2009 15:06:46 +0800 Subject: blktrace: from-sector redundant in trace_block_remap, cleanup The last argument of block_remap prober is the original sector before remap, so it should be 'from', not 'to'. [ Impact: clean up ] Signed-off-by: Li Zefan Cc: "Alan D. Brunelle" Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: KOSAKI Motohiro LKML-Reference: <4A07CE86.5090301@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/block.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/block.h b/include/trace/block.h index 8ac945b7746e..5b12efa096b6 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t to), - TP_ARGS(q, bio, dev, to)); + sector_t from), + TP_ARGS(q, bio, dev, from)); #endif -- cgit v1.2.3 From 6818173bd658439b83896a2a7586f64ab51bf29c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 May 2009 15:37:36 +0200 Subject: splice: implement default splice_read method If f_op->splice_read() is not implemented, fall back to a plain read. Use vfs_readv() to read into previously allocated pages. This will allow splice and functions using splice, such as the loop device, to work on all filesystems. This includes "direct_io" files in fuse which bypass the page cache. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ include/linux/pipe_fs_i.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bed436f4353..d926c2bea166 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2204,6 +2204,8 @@ extern int generic_segment_checks(const struct iovec *iov, /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t default_file_splice_read(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index c8f038554e80..b43a9e039059 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -152,5 +152,6 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); #endif -- cgit v1.2.3 From 2b1ccc0ee918a653d0483fdad9dd6112ce8e9043 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 May 2009 11:11:48 +0200 Subject: splice: fix misleading comment Splice is tied to pipes by design, it'll not change. And now that the splice stuff is in splice.h (and note pipe.h), the rest of the comment is out-of-date as well. Signed-off-by: Jens Axboe --- include/linux/splice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/splice.h b/include/linux/splice.h index 5f3faa9d15ae..18e7c7c0cae6 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -11,8 +11,7 @@ #include /* - * splice is tied to pipes as a transport (at least for now), so we'll just - * add the splice flags here. + * Flags passed in from splice/tee/vmsplice */ #define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ -- cgit v1.2.3 From d34c43078236b41146877c49af341ab85b5fb8db Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Wed, 13 May 2009 21:59:14 -0400 Subject: ASoC: Add SNDRV_PCM_FMTBIT_S32_BE as a valid AC97 format Signed-off-by: Jon Smirl Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 496dc30457b7..352d7eee9b6d 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -79,7 +79,8 @@ struct snd_pcm_substream; #define SND_SOC_CLOCK_OUT 1 #define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S32_LE) + SNDRV_PCM_FMTBIT_S32_LE |\ + SNDRV_PCM_FMTBIT_S32_BE) struct snd_soc_dai_ops; struct snd_soc_dai; -- cgit v1.2.3 From 9fc20f030ba457d20eb994e1def7e2ce7d5ae1a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 May 2009 15:14:18 +0200 Subject: ALSA: ctxfi - Move PCI ID definitions to linux/pci_ids.h Signed-off-by: Takashi Iwai --- include/linux/pci_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06ba90c211a5..619153138986 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1314,6 +1314,13 @@ #define PCI_VENDOR_ID_CREATIVE 0x1102 /* duplicate: ECTIVA */ #define PCI_DEVICE_ID_CREATIVE_EMU10K1 0x0002 +#define PCI_DEVICE_ID_CREATIVE_20K1 0x0005 +#define PCI_DEVICE_ID_CREATIVE_20K2 0x000b +#define PCI_SUBDEVICE_ID_CREATIVE_SB0760 0x0024 +#define PCI_SUBDEVICE_ID_CREATIVE_SB08801 0x0041 +#define PCI_SUBDEVICE_ID_CREATIVE_SB08802 0x0042 +#define PCI_SUBDEVICE_ID_CREATIVE_SB08803 0x0043 +#define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX 0x6000 #define PCI_VENDOR_ID_ECTIVA 0x1102 /* duplicate: CREATIVE */ #define PCI_DEVICE_ID_ECTIVA_EV1938 0x8938 -- cgit v1.2.3 From 9e35ad388bea89f7d6f375af4c0ae98803688666 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 13 May 2009 16:21:38 +0200 Subject: perf_counter: Rework the perf counter disable/enable The current disable/enable mechanism is: token = hw_perf_save_disable(); ... /* do bits */ ... hw_perf_restore(token); This works well, provided that the use nests properly. Except we don't. x86 NMI/INT throttling has non-nested use of this, breaking things. Therefore provide a reference counter disable/enable interface, where the first disable disables the hardware, and the last enable enables the hardware again. [ Impact: refactor, simplify the PMU disable/enable logic ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 614f921d616a..e543ecc129f1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -544,8 +544,10 @@ extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); -extern u64 hw_perf_save_disable(void); -extern void hw_perf_restore(u64 ctrl); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); extern int hw_perf_group_sched_in(struct perf_counter *group_leader, @@ -600,8 +602,8 @@ static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } -static inline void hw_perf_restore(u64 ctrl) { } -static inline u64 hw_perf_save_disable(void) { return 0; } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -- cgit v1.2.3 From 789f90fcf6b0b54e655740e9396c954378542c79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 May 2009 15:19:27 +0200 Subject: perf_counter: per user mlock gift Instead of a per-process mlock gift for perf-counters, use a per-user gift so that there is less of a DoS potential. [ Impact: allow less worst-case unprivileged memory consumption ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <20090515132018.496182835@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d1857580a132..ff59d1231519 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -674,6 +674,10 @@ struct user_struct { struct work_struct work; #endif #endif + +#ifdef CONFIG_PERF_COUNTERS + atomic_long_t locked_vm; +#endif }; extern int uids_sysfs_init(void); -- cgit v1.2.3 From 60db5e09c13109b13830cc9dcae688003fd39e79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 May 2009 15:19:28 +0200 Subject: perf_counter: frequency based adaptive irq_period Instead of specifying the irq_period for a counter, provide a target interrupt frequency and dynamically adapt the irq_period to match this frequency. [ Impact: new perf-counter attribute/feature ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <20090515132018.646195868@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e543ecc129f1..004b6e162b96 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -130,7 +130,11 @@ struct perf_counter_hw_event { */ __u64 config; - __u64 irq_period; + union { + __u64 irq_period; + __u64 irq_freq; + }; + __u32 record_type; __u32 read_format; @@ -146,8 +150,9 @@ struct perf_counter_hw_event { mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ @@ -337,6 +342,7 @@ struct hw_perf_counter { atomic64_t prev_count; u64 irq_period; atomic64_t period_left; + u64 interrupts; #endif }; -- cgit v1.2.3 From dce48a84adf1806676319f6f480e30a6daa012f9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 11 Apr 2009 10:43:41 +0200 Subject: sched, timers: move calc_load() to scheduler Dimitri Sivanich noticed that xtime_lock is held write locked across calc_load() which iterates over all online CPUs. That can cause long latencies for xtime_lock readers on large SMP systems. The load average calculation is an rough estimate anyway so there is no real need to protect the readers vs. the update. It's not a problem when the avenrun array is updated while a reader copies the values. Instead of iterating over all online CPUs let the scheduler_tick code update the number of active tasks shortly before the avenrun update happens. The avenrun update itself is handled by the CPU which calls do_timer(). [ Impact: reduce xtime_lock write locked section ] Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049c..6eb4892efe45 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -135,8 +135,8 @@ DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); -extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern void calc_global_load(void); extern unsigned long get_parent_ip(unsigned long addr); -- cgit v1.2.3 From 2d02494f5a90f2e4b3c4c6acc85ec94674cdc431 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 May 2009 20:08:52 +0200 Subject: sched, timers: cleanup avenrun users avenrun is an rough estimate so we don't have to worry about consistency of the three avenrun values. Remove the xtime lock dependency and provide a function to scale the values. Cleanup the users. [ Impact: cleanup ] Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6eb4892efe45..de7b3b217772 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -116,6 +116,7 @@ struct fs_struct; * 11 bit fractions. */ extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1< Date: Thu, 14 May 2009 21:48:08 +1000 Subject: perf_counter: allow arch to supply event misc flags and instruction pointer At present the values we put in overflow events for the misc flags indicating processor mode and the instruction pointer are obtained using the standard user_mode() and instruction_pointer() functions. Those functions tell you where the performance monitor interrupt was taken, which might not be exactly where the counter overflow occurred, for example because interrupts were disabled at the point where the overflow occurred, or because the processor had many instructions in flight and chose to complete some more instructions beyond the one that caused the counter overflow. Some architectures (e.g. powerpc) can supply more precise information about where the counter overflow occurred and the processor mode at that point. This introduces new functions, perf_misc_flags() and perf_instruction_pointer(), which arch code can override to provide more precise information if available. They have default implementations which are identical to the existing code. This also adds a new misc flag value, PERF_EVENT_MISC_HYPERVISOR, for the case where a counter overflow occurred in the hypervisor. We encode the processor mode in the 2 bits previously used to indicate user or kernel mode; the values for user and kernel mode are unchanged and hypervisor mode is indicated by both bits being set. [ Impact: generalize perfcounter core facilities ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <18956.1272.818511.561835@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 004b6e162b96..c8c1dfc22c93 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -215,8 +215,11 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) +#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_USER (2 << 0) +#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) #define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { @@ -596,6 +599,12 @@ extern int sysctl_perf_counter_mlock; extern void perf_counter_init(void); +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ + PERF_EVENT_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From 9a1a69a1f41cbefebf3172761f197db6aba71e68 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Wed, 29 Apr 2009 13:12:39 -0500 Subject: [SCSI] fc-transport: Close state transition-window during rport deletion. Andrew Vasquez wrote: > fc-transport: Close state transition-window during rport deletion. > > After an rport's state has transitioned to FC_PORTSTATE_BLOCKED, > but, prior to making the upcall to 'block' the scsi-target > associated with an rport, queued commands can recycle and > ultimately run out of retries causing failures to propagate to > upper-level drivers. Close this transition-window by returning > the non-'retries' modifying DID_IMM_RETRY status for submitted > I/Os. The same can happen for iscsi when transitioning from logged in to failed and blocking the sdevs. This patch converts iscsi and fc's transitions back to use DID_IMM_RETRY instead of DID_TRANSPORT_DISRUPTED which has a limited number of retries that we do not want to use for handling this race. Signed-off-by: Andrew Vasquez [Addition of iscsi and fc port online devloss case conversion by Mike Christie] Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/scsi_transport_fc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index c9184f756cad..68a8d873bbd9 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -680,7 +680,7 @@ fc_remote_port_chkready(struct fc_rport *rport) if (rport->roles & FC_PORT_ROLE_FCP_TARGET) result = 0; else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) - result = DID_TRANSPORT_DISRUPTED << 16; + result = DID_IMM_RETRY << 16; else result = DID_NO_CONNECT << 16; break; @@ -688,7 +688,7 @@ fc_remote_port_chkready(struct fc_rport *rport) if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT) result = DID_TRANSPORT_FAILFAST << 16; else - result = DID_TRANSPORT_DISRUPTED << 16; + result = DID_IMM_RETRY << 16; break; default: result = DID_NO_CONNECT << 16; -- cgit v1.2.3 From ca1b96e00ab5d1b0838965834469a0284c81a517 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:21 +0200 Subject: ide: replace special_t typedef by IDE_SFLAG_* flags Replace: - special_t typedef by IDE_SFLAG_* flags - 'special_t special' ide_drive_t's field by 'u8 special_flags' one There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 34c128f0a33c..fc61328a4cdb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -218,21 +218,12 @@ static inline void ide_std_init_ports(hw_regs_t *hw, /* * Special Driver Flags - * - * set_geometry : respecify drive geometry - * recalibrate : seek to cyl 0 - * set_multmode : set multmode count - * reserved : unused */ -typedef union { - unsigned all : 8; - struct { - unsigned set_geometry : 1; - unsigned recalibrate : 1; - unsigned set_multmode : 1; - unsigned reserved : 5; - } b; -} special_t; +enum { + IDE_SFLAG_SET_GEOMETRY = (1 << 0), + IDE_SFLAG_RECALIBRATE = (1 << 1), + IDE_SFLAG_SET_MULTMODE = (1 << 2), +}; /* * Status returned from various ide_ functions @@ -530,7 +521,7 @@ struct ide_drive_s { unsigned long sleep; /* sleep until this time */ unsigned long timeout; /* max time to wait for irq */ - special_t special; /* special action flags */ + u8 special_flags; /* special action flags */ u8 select; /* basic drive/head select reg value */ u8 retry_pio; /* retrying dma capable host in pio */ -- cgit v1.2.3 From 29e52cf793ded6bece50de50e738596f94f07d9f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:22 +0200 Subject: ide: remove chipset field from hw_regs_t * Convert host drivers that still use hw_regs_t's chipset field to use the one in struct ide_port_info instead. * Move special handling of ide_pci chipset type from ide_hw_configure() to ide_init_port(). * Remove chipset field from hw_regs_t. While at it: - remove stale comment in delkin_cb.c There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index fc61328a4cdb..9652edbd26af 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -186,7 +186,6 @@ typedef struct hw_regs_s { int irq; /* our irq number */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ - hwif_chipset_t chipset; struct device *dev, *parent; unsigned long config; } hw_regs_t; -- cgit v1.2.3 From dca3983059a4481e4ae97bbf0ac4b4c21429e1a5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:24 +0200 Subject: ide: pass number of ports to ide_host_{alloc,add}() (v2) Pass number of ports to ide_host_{alloc,add}() and then update all users accordingly. v2: - drop no longer needed NULL initializers in buddha.c, cmd640.c and gayle.c (noticed by Sergei) There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9652edbd26af..a3cd568553d3 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1456,11 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); int ide_sysfs_register_port(ide_hwif_t *); -struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **); +struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **, + unsigned int); void ide_host_free(struct ide_host *); int ide_host_register(struct ide_host *, const struct ide_port_info *, hw_regs_t **); -int ide_host_add(const struct ide_port_info *, hw_regs_t **, +int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int, struct ide_host **); void ide_host_remove(struct ide_host *); int ide_legacy_device_add(const struct ide_port_info *, unsigned long); -- cgit v1.2.3 From 9f36d31437922354d104a2db407f397e79e4027e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:25 +0200 Subject: ide: remove hw_regs_t typedef Remove hw_regs_t typedef and rename struct hw_regs_s to struct ide_hw. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index a3cd568553d3..b1b903a0dac8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -178,7 +178,7 @@ typedef u8 hwif_chipset_t; /* * Structure to hold all information about the location of this port */ -typedef struct hw_regs_s { +struct ide_hw { union { struct ide_io_ports io_ports; unsigned long io_ports_array[IDE_NR_PORTS]; @@ -188,9 +188,9 @@ typedef struct hw_regs_s { ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ struct device *dev, *parent; unsigned long config; -} hw_regs_t; +}; -static inline void ide_std_init_ports(hw_regs_t *hw, +static inline void ide_std_init_ports(struct ide_hw *hw, unsigned long io_addr, unsigned long ctl_addr) { @@ -1212,7 +1212,7 @@ static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev) } void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, - hw_regs_t *, hw_regs_t **); + struct ide_hw *, struct ide_hw **); void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *); #ifdef CONFIG_BLK_DEV_IDEDMA_PCI @@ -1456,12 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); int ide_sysfs_register_port(ide_hwif_t *); -struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **, +struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **, unsigned int); void ide_host_free(struct ide_host *); int ide_host_register(struct ide_host *, const struct ide_port_info *, - hw_regs_t **); -int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int, + struct ide_hw **); +int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int, struct ide_host **); void ide_host_remove(struct ide_host *); int ide_legacy_device_add(const struct ide_port_info *, unsigned long); -- cgit v1.2.3 From b83674c0da6558e357c6b482ccf299eeea77d8ef Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:03 -0400 Subject: reiserfs: fixup perms when xattrs are disabled This adds CONFIG_REISERFS_FS_XATTR protection from reiserfs_permission. This is needed to avoid warnings during file deletions and chowns with xattrs disabled. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- include/linux/reiserfs_xattr.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index cdedc01036e4..99928dce37ea 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -41,6 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); +int reiserfs_permission(struct inode *inode, int mask); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) @@ -50,7 +51,6 @@ int reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int reiserfs_removexattr(struct dentry *dentry, const char *name); -int reiserfs_permission(struct inode *inode, int mask); int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); @@ -117,8 +117,6 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode) #define reiserfs_listxattr NULL #define reiserfs_removexattr NULL -#define reiserfs_permission NULL - static inline void reiserfs_init_xattr_rwsem(struct inode *inode) { } -- cgit v1.2.3 From 888a589f6be07d624e21e2174d98375e9f95911b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 May 2009 13:59:37 -0700 Subject: mm, x86: remove MEMORY_HOTPLUG_RESERVE related code after: | commit b263295dbffd33b0fbff670720fa178c30e3392a | Author: Christoph Lameter | Date: Wed Jan 30 13:30:47 2008 +0100 | | x86: 64-bit, make sparsemem vmemmap the only memory model we don't have MEMORY_HOTPLUG_RESERVE anymore. Historically, x86-64 had an architecture-specific method for memory hotplug whereby it scanned the SRAT for physical memory ranges that could be potentially used for memory hot-add later. By reserving those ranges without physical memory, the memmap would be allocated and left dormant until needed. This depended on the DISCONTIG memory model which has been removed so the code implementing HOTPLUG_RESERVE is now dead. This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE. (Changelog authored by Mel.) v2: updated changelog, and remove hotadd= in doc [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Reviewed-by: Christoph Lameter Reviewed-by: Mel Gorman Workflow-found-OK-by: Andrew Morton LKML-Reference: <4A0C4910.7090508@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c7..511b09867096 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); extern void remove_all_active_ranges(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); -- cgit v1.2.3 From eb33575cf67d3f35fa2510210ef92631266e2465 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 13 May 2009 17:34:48 +0100 Subject: [ARM] Double check memmap is actually valid with a memmap has unexpected holes V2 pfn_valid() is meant to be able to tell if a given PFN has valid memmap associated with it or not. In FLATMEM, it is expected that holes always have valid memmap as long as there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed that a valid section has a memmap for the entire section. However, ARM and maybe other embedded architectures in the future free memmap backing holes to save memory on the assumption the memmap is never used. The page_zone linkages are then broken even though pfn_valid() returns true. A walker of the full memmap must then do this additional check to ensure the memmap they are looking at is sane by making sure the zone and PFN linkages are still valid. This is expensive, but walkers of the full memmap are extremely rare. This was caught before for FLATMEM and hacked around but it hits again for SPARSEMEM because the page_zone linkages can look ok where the PFN linkages are totally screwed. This looks like a hatchet job but the reality is that any clean solution would end up consumning all the memory saved by punching these unexpected holes in the memmap. For example, we tried marking the memmap within the section invalid but the section size exceeds the size of the hole in most cases so pfn_valid() starts returning false where valid memmap exists. Shrinking the size of the section would increase memory consumption offsetting the gains. This patch identifies when an architecture is punching unexpected holes in the memmap that the memory model cannot automatically detect and sets ARCH_HAS_HOLES_MEMORYMODEL. At the moment, this is restricted to EP93xx which is the model sub-architecture this has been reported on but may expand later. When set, walkers of the full memmap must call memmap_valid_within() for each PFN and passing in what it expects the page and zone to be for that PFN. If it finds the linkages to be broken, it assumes the memmap is invalid for that PFN. Signed-off-by: Mel Gorman Signed-off-by: Russell King --- include/linux/mmzone.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 186ec6ab334d..a47c879e1304 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1097,6 +1097,32 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #define pfn_valid_within(pfn) (1) #endif +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +/* + * pfn_valid() is meant to be able to tell if a given PFN has valid memmap + * associated with it or not. In FLATMEM, it is expected that holes always + * have valid memmap as long as there is valid PFNs either side of the hole. + * In SPARSEMEM, it is assumed that a valid section has a memmap for the + * entire section. + * + * However, an ARM, and maybe other embedded architectures in the future + * free memmap backing holes to save memory on the assumption the memmap is + * never used. The page_zone linkages are then broken even though pfn_valid() + * returns true. A walker of the full memmap must then do this additional + * check to ensure the memmap they are looking at is sane by making sure + * the zone and PFN linkages are still valid. This is expensive, but walkers + * of the full memmap are extremely rare. + */ +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone); +#else +static inline int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ -- cgit v1.2.3 From 1cde26f928863d90e9e7c1217880c8450464d305 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 18 May 2009 14:41:30 +0200 Subject: virtio_blk: SG_IO passthru support Add support for SG_IO passthru to virtio_blk. We add the scsi command block after the normal outhdr, and the scsi inhdr with full status information aswell as the sense buffer before the regular inhdr. [hch: forward ported, added the VIRTIO_BLK_F_SCSI flags, some comments and tested the whole beast] [axboe: updated to use ->resid and not dual-path the byte count] Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Rusty Russell (+ checkpatch.pl tweak) Signed-off-by: Jens Axboe --- include/linux/virtio_blk.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 94c56d29869d..4dbcbc1c3481 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -15,6 +15,7 @@ #define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ struct virtio_blk_config { @@ -55,6 +56,13 @@ struct virtio_blk_outhdr __u64 sector; }; +struct virtio_scsi_inhdr { + __u32 errors; + __u32 data_len; + __u32 sense_len; + __u32 residual; +}; + /* And this is the final byte of the write scatter-gather list. */ #define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_IOERR 1 -- cgit v1.2.3 From 6d3ddc81f5762d54ce7d1db70eb757c6c12fabbc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 16 May 2009 17:47:29 +0100 Subject: ASoC: Split DAPM power checks from sequencing of power changes DAPM has always applied any changes to the power state of widgets as soon as it has determined that they are required. Instead of doing this store all the changes that are required on lists of widgets to power up and down, then iterate over those lists and apply the changes. This changes the sequence in which changes are implemented, doing all power downs before power ups and always using the up/down sequences (previously they were only used when changes were due to DAC/ADC power events). The error handling is also changed so that we continue attempting to power widgets if some changes fail. The main benefit of this is to allow future changes to do optimisations over the whole power sequence and to reduce the number of walks of the widget graph required to check the power status of widgets. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 +++ include/sound/soc.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 533f9f256496..b3f789d0cee8 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -385,6 +385,9 @@ struct snd_soc_dapm_widget { /* widget input and outputs */ struct list_head sources; struct list_head sinks; + + /* used during DAPM updates */ + struct list_head power_list; }; #endif diff --git a/include/sound/soc.h b/include/sound/soc.h index 6ab80bf7abd2..8309ce81cf3b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -372,6 +372,8 @@ struct snd_soc_codec { enum snd_soc_bias_level bias_level; enum snd_soc_bias_level suspend_bias_level; struct delayed_work delayed_work; + struct list_head up_list; + struct list_head down_list; /* codec DAI's */ struct snd_soc_dai *dai; -- cgit v1.2.3 From 452c5eaa0d5162e02ffee742ea17540887bc2904 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 17 May 2009 21:41:23 +0100 Subject: ASoC: Integrate bias management with DAPM power management Rather than managing the bias level of the system based on if there is an active audio stream manage it based on there being an active DAPM widget. This simplifies the code a little, moving the power handling into one place, and improves audio performance for bypass paths when no playbacks or captures are active. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 -- include/sound/soc.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index b3f789d0cee8..ec8a45f9a069 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -279,8 +279,6 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, /* dapm events */ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream, int event); -int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, - enum snd_soc_bias_level level); /* dapm sys fs - used by the core */ int snd_soc_dapm_sys_add(struct device *dev); diff --git a/include/sound/soc.h b/include/sound/soc.h index 8309ce81cf3b..2af3213df90c 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -339,6 +339,7 @@ struct snd_soc_codec { struct module *owner; struct mutex mutex; struct device *dev; + struct snd_soc_device *socdev; struct list_head list; -- cgit v1.2.3 From bac9caf016bf147af7d3afbe7580a7f773cb1566 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 17 May 2009 18:18:58 -0700 Subject: asm-generic: fix local_add_unless macro `local_add_unless(x, y, z)' will be expanded to `(&(x)->y, (y), (x))', but `&(x)->y' should be `&(x)->a' Signed-off-by: Roel Kluin Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/local.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h index dbd6150763e9..fc218444e315 100644 --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -42,7 +42,7 @@ typedef struct #define local_cmpxchg(l, o, n) atomic_long_cmpxchg((&(l)->a), (o), (n)) #define local_xchg(l, n) atomic_long_xchg((&(l)->a), (n)) -#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) +#define local_add_unless(l, _a, u) atomic_long_add_unless((&(l)->a), (_a), (u)) #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) /* Non-atomic variants, ie. preemption disabled and won't be touched -- cgit v1.2.3 From 75834fc3b6fcff00327f5d2a18760c1e8e0179c5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 18 May 2009 10:26:10 -0400 Subject: SELinux: move SELINUX_MAGIC into magic.h The selinuxfs superblock magic is used inside the IMA code, but is being defined in two places and could someday get out of sync. This patch moves the declaration into magic.h so it is only done once. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/magic.h b/include/linux/magic.h index 5b4e28bcb788..927138cf3050 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -9,6 +9,7 @@ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 +#define SELINUX_MAGIC 0xf97cff8c #define TMPFS_MAGIC 0x01021994 #define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 -- cgit v1.2.3 From 4200efd9acda4accf24640f1e77d24fdcdb524df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 May 2009 09:22:19 +0200 Subject: sched: properly define the sched_group::cpumask and sched_domain::span fields Properly document the variable-size structure tricks we are doing wrt. struct sched_group and sched_domain, and use the field[0] GCC extension instead of defining a vla array. Dont use unions for this, as pointed out by Linus. [ Impact: cleanup, un-confuse Sparse and LLVM ] Reported-by: Jeff Garzik Acked-by: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index de7b3b217772..dbb1043e8656 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -839,7 +839,17 @@ struct sched_group { */ u32 reciprocal_cpu_power; - unsigned long cpumask[]; + /* + * The CPUs this group covers. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_group' in kernel/sched.c) + */ + unsigned long cpumask[0]; }; static inline struct cpumask *sched_group_cpus(struct sched_group *sg) @@ -925,8 +935,17 @@ struct sched_domain { char *name; #endif - /* span of all CPUs in this domain */ - unsigned long span[]; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_domain' in kernel/sched.c) + */ + unsigned long span[0]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -- cgit v1.2.3 From 79eb63e9e5875b84341a3a05f8e6ae9cdb4bb6f6 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 17 May 2009 18:57:15 +0300 Subject: block: Add blk_make_request(), takes bio, returns a request New block API: given a struct bio allocates a new request. This is the parallel of generic_make_request for BLOCK_PC commands users. The passed bio may be a chained-bio. The bio is bounced if needed inside the call to this member. This is in the effort of un-exporting blk_rq_append_bio(). Signed-off-by: Boaz Harrosh CC: Jeff Garzik Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9d60a78c08a..88a83e112c95 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -740,6 +740,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); +extern struct request *blk_make_request(struct request_queue *, struct bio *, + gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); -- cgit v1.2.3 From a411f4bbb89f1f08687b344064d6775bce1e4658 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 17 May 2009 19:00:01 +0300 Subject: block: Un-export blk_rq_append_bio OSD was the last in-tree user of blk_rq_append_bio(). Now that it is fixed blk_rq_append_bio is un-exported and is only used internally by block layer. Signed-off-by: Boaz Harrosh Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 88a83e112c95..564445be7a6d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -757,12 +757,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -/* - * Temporary export, until SCSI gets fixed up. - */ -extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3 From 8e7d2b2c6ecd3c21a54b877eae3d5be48292e6b5 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 8 May 2009 16:13:25 -0700 Subject: drm/i915: allocate large pointer arrays with vmalloc For awhile now, many of the GEM code paths have allocated page or object arrays with the slab allocator. This is nice and fast, but won't work well if memory is fragmented, since the slab allocator works with physically contiguous memory (i.e. order > 2 allocations are likely to fail fairly early after booting and doing some work). This patch works around the issue by falling back to vmalloc for >PAGE_SIZE allocations. This is ugly, but much less work than chaining a bunch of pages together by hand (suprisingly there's not a bunch of generic kernel helpers for this yet afaik). vmalloc space is somewhat precious on 32 bit kernels, but our allocations shouldn't be big enough to cause problems, though they're routinely more than a page. Note that this patch doesn't address the unchecked alloc-based-on-ioctl-args in GEM; that needs to be fixed in a separate patch. Also, I've deliberately ignored the DRM's "area" junk. I don't think anyone actually uses it anymore and I'm hoping it gets ripped out soon. [Updated: removed size arg to new free function. We could unify the free functions as well once the DRM mem tracking is ripped out.] fd.o bug #20152 (part 1/3) Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- include/drm/drmP.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index c8c422151431..b84d8ae35e6f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1519,6 +1519,30 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area) { return kcalloc(nmemb, size, GFP_KERNEL); } + +static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) +{ + u8 *addr; + + if (size <= PAGE_SIZE) + return kcalloc(nmemb, size, GFP_KERNEL); + + addr = vmalloc(nmemb * size); + if (!addr) + return NULL; + + memset(addr, 0, nmemb * size); + + return addr; +} + +static __inline void drm_free_large(void *ptr) +{ + if (!is_vmalloc_addr(ptr)) + return kfree(ptr); + + vfree(ptr); +} #else extern void *drm_alloc(size_t size, int area); extern void drm_free(void *pt, size_t size, int area); -- cgit v1.2.3 From c44d70a340554a33071339064a303ac0f1a31623 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 17 May 2009 11:24:08 +0200 Subject: perf_counter: fix counter inheritance race Context rotation should not occur when we are in the middle of walking the counter list when inheriting counters ... [ Impact: fix occasionally incorrect perf stat results ] Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: Marcelo Tosatti Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c8c1dfc22c93..13cb2fbbf334 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -508,6 +508,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + int rr_allowed; struct task_struct *task; /* -- cgit v1.2.3 From 0a7ae2ff0d29bb3b327edff4c8ab67b3834fa811 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 May 2009 08:54:31 +0200 Subject: block: change the tag sync vs async restriction logic Make them fully share the tag space, but disallow async requests using the last any two slots. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 564445be7a6d..a967dd775dbd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -404,7 +404,7 @@ struct request_queue struct list_head tag_busy_list; unsigned int nr_sorted; - unsigned int in_flight; + unsigned int in_flight[2]; unsigned int rq_timeout; struct timer_list timeout; @@ -511,6 +511,11 @@ static inline void queue_flag_clear_unlocked(unsigned int flag, __clear_bit(flag, &q->queue_flags); } +static inline int queue_in_flight(struct request_queue *q) +{ + return q->in_flight[0] + q->in_flight[1]; +} + static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) { WARN_ON_ONCE(!queue_is_locked(q)); -- cgit v1.2.3 From d7b629a34fc4134a43c730b5f0197855dc4948d0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 May 2009 12:21:19 +0200 Subject: perf_counter: Solve the rotate_ctx vs inherit race differently Instead of disabling RR scheduling of the counters, use a different list that does not get rotated to iterate the counters on inheritance. [ Impact: cleanup, optimization ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Marcelo Tosatti Cc: John Kacur LKML-Reference: <20090520102553.237504544@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 13cb2fbbf334..c8c1dfc22c93 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -508,7 +508,6 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; - int rr_allowed; struct task_struct *task; /* -- cgit v1.2.3 From 26b119bc811a73bac6ecf95bdf284bf31c7955f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 May 2009 12:21:20 +0200 Subject: perf_counter: Log irq_period changes For the dynamic irq_period code, log whenever we change the period so that analyzing code can normalize the event flow. [ Impact: add new feature to allow more precise profiling ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Marcelo Tosatti Cc: John Kacur LKML-Reference: <20090520102553.298769743@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c8c1dfc22c93..f612941ef46e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -257,6 +257,14 @@ enum perf_event_type { */ PERF_EVENT_COMM = 3, + /* + * struct { + * struct perf_event_header header; + * u64 irq_period; + * }; + */ + PERF_EVENT_PERIOD = 4, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* -- cgit v1.2.3 From 89f536ccfa8b370ff4d054f4061858ca9322c25a Mon Sep 17 00:00:00 2001 From: Stephane Chatty Date: Wed, 20 May 2009 15:41:24 +0200 Subject: HID: add new multitouch and digitizer contants Added constants to hid.h for all digitizer usages (including the new multitouch ones that are not yet in the official USB spec but are being pushed by Microsft as described in their paper "Digitizer Drivers for Windows Touch and Pen-Based Computers"). Updated hid-debug.c to support the new MT input constants such as ABS_MT_POSITION_X. Signed-off-by: Stephane Chatty Signed-off-by: Jiri Kosina --- include/linux/hid.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index a72876e43589..53489fd4d700 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -238,6 +238,42 @@ struct hid_item { #define HID_GD_RIGHT 0x00010092 #define HID_GD_LEFT 0x00010093 +#define HID_DG_DIGITIZER 0x000d0001 +#define HID_DG_PEN 0x000d0002 +#define HID_DG_LIGHTPEN 0x000d0003 +#define HID_DG_TOUCHSCREEN 0x000d0004 +#define HID_DG_TOUCHPAD 0x000d0005 +#define HID_DG_STYLUS 0x000d0020 +#define HID_DG_PUCK 0x000d0021 +#define HID_DG_FINGER 0x000d0022 +#define HID_DG_TIPPRESSURE 0x000d0030 +#define HID_DG_BARRELPRESSURE 0x000d0031 +#define HID_DG_INRANGE 0x000d0032 +#define HID_DG_TOUCH 0x000d0033 +#define HID_DG_UNTOUCH 0x000d0034 +#define HID_DG_TAP 0x000d0035 +#define HID_DG_TABLETFUNCTIONKEY 0x000d0039 +#define HID_DG_PROGRAMCHANGEKEY 0x000d003a +#define HID_DG_INVERT 0x000d003c +#define HID_DG_TIPSWITCH 0x000d0042 +#define HID_DG_TIPSWITCH2 0x000d0043 +#define HID_DG_BARRELSWITCH 0x000d0044 +#define HID_DG_ERASER 0x000d0045 +#define HID_DG_TABLETPICK 0x000d0046 +/* + * as of May 20, 2009 the usages below are not yet in the official USB spec + * but are being pushed by Microsft as described in their paper "Digitizer + * Drivers for Windows Touch and Pen-Based Computers" + */ +#define HID_DG_CONFIDENCE 0x000d0047 +#define HID_DG_WIDTH 0x000d0048 +#define HID_DG_HEIGHT 0x000d0049 +#define HID_DG_CONTACTID 0x000d0051 +#define HID_DG_INPUTMODE 0x000d0052 +#define HID_DG_DEVICEINDEX 0x000d0053 +#define HID_DG_CONTACTCOUNT 0x000d0054 +#define HID_DG_CONTACTMAX 0x000d0055 + /* * HID report types --- Ouch! HID spec says 1 2 3! */ -- cgit v1.2.3 From 03fbdb15c14e9746c63168e3ff2c64b9c8336d33 Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Wed, 20 May 2009 22:39:08 +0100 Subject: [ARM] 5519/1: amba probe: pass "struct amba_id *" instead of void * The second argument of the probe method points to the amba_id structure, so it's better passed with the correct type. None of the current in-tree drivers uses the pointer, so they have only been checked for a clean compile. Change suggested by Russell King. Signed-off-by: Alessandro Rubini Signed-off-by: Russell King --- include/linux/amba/bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 51e6e54b2aa1..9b93cafa82a0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -28,7 +28,7 @@ struct amba_id { struct amba_driver { struct device_driver drv; - int (*probe)(struct amba_device *, void *); + int (*probe)(struct amba_device *, struct amba_id *); int (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); int (*suspend)(struct amba_device *, pm_message_t); -- cgit v1.2.3 From b9fc745db833bbf74b4988493b8cd902a84c9415 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 19 May 2009 13:25:57 -0400 Subject: integrity: path_check update - Add support in ima_path_check() for integrity checking without incrementing the counts. (Required for nfsd.) - rename and export opencount_get to ima_counts_get - replace ima_shm_check calls with ima_counts_get - export ima_path_check Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- include/linux/ima.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ima.h b/include/linux/ima.h index 0e2aa45cb0ce..b1b827d091a9 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -13,14 +13,17 @@ #include struct linux_binprm; +#define IMA_COUNT_UPDATE 1 +#define IMA_COUNT_LEAVE 0 + #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct path *path, int mask, int update_counts); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); -extern void ima_shm_check(struct file *file); +extern void ima_counts_get(struct file *file); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct path *path, int mask, int update_counts) { return 0; } @@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } -static inline void ima_shm_check(struct file *file) +static inline void ima_counts_get(struct file *file) { return; } -- cgit v1.2.3 From 5c82f56736e4c3a9eaf53c94366b056c8622d79e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 May 2009 09:41:30 +0100 Subject: AsoC: Make snd_soc_read() and snd_soc_write() functions Should be no impact on the generated code but it helps the compiler print clearer messages. Signed-off-by: Mark Brown --- include/sound/soc.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 2af3213df90c..cf6111d72b17 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -214,10 +214,6 @@ void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count, struct snd_soc_jack_gpio *gpios); #endif -/* codec IO */ -#define snd_soc_read(codec, reg) codec->read(codec, reg) -#define snd_soc_write(codec, reg, value) codec->write(codec, reg, value) - /* codec register bit access */ int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned short reg, unsigned short mask, unsigned short value); @@ -507,6 +503,19 @@ struct soc_enum { void *dapm; }; +/* codec IO */ +static inline unsigned int snd_soc_read(struct snd_soc_codec *codec, + unsigned int reg) +{ + return codec->read(codec, reg); +} + +static inline unsigned int snd_soc_write(struct snd_soc_codec *codec, + unsigned int reg, unsigned int val) +{ + return codec->write(codec, reg, val); +} + #include #endif -- cgit v1.2.3 From a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 22 May 2009 14:17:31 +1000 Subject: perf_counter: Dynamically allocate tasks' perf_counter_context struct This replaces the struct perf_counter_context in the task_struct with a pointer to a dynamically allocated perf_counter_context struct. The main reason for doing is this is to allow us to transfer a perf_counter_context from one task to another when we do lazy PMU switching in a later patch. This has a few side-benefits: the task_struct becomes a little smaller, we save some memory because only tasks that have perf_counters attached get a perf_counter_context allocated for them, and we can remove the inclusion of in sched.h, meaning that we don't end up recompiling nearly everything whenever perf_counter.h changes. The perf_counter_context structures are reference-counted and freed when the last reference is dropped. A context can have references from its task and the counters on its task. Counters can outlive the task so it is possible that a context will be freed well after its task has exited. Contexts are allocated on fork if the parent had a context, or otherwise the first time that a per-task counter is created on a task. In the latter case, we set the context pointer in the task struct locklessly using an atomic compare-and-exchange operation in case we raced with some other task in creating a context for the subject task. This also removes the task pointer from the perf_counter struct. The task pointer was not used anywhere and would make it harder to move a context from one task to another. Anything that needed to know which task a counter was attached to was already using counter->ctx->task. The __perf_counter_init_context function moves up in perf_counter.c so that it can be called from find_get_context, and now initializes the refcount, but is otherwise unchanged. We were potentially calling list_del_counter twice: once from __perf_counter_exit_task when the task exits and once from __perf_counter_remove_from_context when the counter's fd gets closed. This adds a check in list_del_counter so it doesn't do anything if the counter has already been removed from the lists. Since perf_counter_task_sched_in doesn't do anything if the task doesn't have a context, and leaves cpuctx->task_ctx = NULL, this adds code to __perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in the case where the current task adds the first counter to itself and thus creates a context for itself. This also adds similar code to __perf_counter_enable to handle a similar situation which can arise when the counters have been disabled using prctl; that also leaves cpuctx->task_ctx = NULL. [ Impact: refactor counter context management to prepare for new feature ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 13 ------------- include/linux/perf_counter.h | 4 +--- include/linux/sched.h | 6 ++++-- 3 files changed, 5 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 503afaa0afa7..d87247d2641f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,18 +108,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_ctx.counter_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ - .perf_counter_ctx.event_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ - .perf_counter_ctx.lock = \ - __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), -#else -# define INIT_PERF_COUNTERS(tsk) -#endif - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -183,7 +171,6 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f612941ef46e..071309005468 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -449,7 +449,6 @@ struct perf_counter { struct hw_perf_counter hw; struct perf_counter_context *ctx; - struct task_struct *task; struct file *filp; struct perf_counter *parent; @@ -498,7 +497,6 @@ struct perf_counter { * Used as a container for task counters and CPU counters as well: */ struct perf_counter_context { -#ifdef CONFIG_PERF_COUNTERS /* * Protect the states of the counters in the list, * nr_active, and the list: @@ -516,6 +514,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + atomic_t refcount; struct task_struct *task; /* @@ -523,7 +522,6 @@ struct perf_counter_context { */ u64 time; u64 timestamp; -#endif }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index ff59d1231519..9714d450f417 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,7 +71,6 @@ struct sched_param { #include #include #include -#include #include #include #include @@ -99,6 +98,7 @@ struct robust_list_head; struct bio; struct bts_tracer; struct fs_struct; +struct perf_counter_context; /* * List of flags we want to share for kernel threads, @@ -1387,7 +1387,9 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif - struct perf_counter_context perf_counter_ctx; +#ifdef CONFIG_PERF_COUNTERS + struct perf_counter_context *perf_counter_ctxp; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; -- cgit v1.2.3 From 564c2b210add41df9a3a5aaa365c1d97cff6110d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 22 May 2009 14:27:22 +1000 Subject: perf_counter: Optimize context switch between identical inherited contexts When monitoring a process and its descendants with a set of inherited counters, we can often get the situation in a context switch where both the old (outgoing) and new (incoming) process have the same set of counters, and their values are ultimately going to be added together. In that situation it doesn't matter which set of counters are used to count the activity for the new process, so there is really no need to go through the process of reading the hardware counters and updating the old task's counters and then setting up the PMU for the new task. This optimizes the context switch in this situation. Instead of scheduling out the perf_counter_context for the old task and scheduling in the new context, we simply transfer the old context to the new task and keep using it without interruption. The new context gets transferred to the old task. This means that both tasks still have a valid perf_counter_context, so no special case is introduced when the old task gets scheduled in again, either on this CPU or another CPU. The equivalence of contexts is detected by keeping a pointer in each cloned context pointing to the context it was cloned from. To cope with the situation where a context is changed by adding or removing counters after it has been cloned, we also keep a generation number on each context which is incremented every time a context is changed. When a context is cloned we take a copy of the parent's generation number, and two cloned contexts are equivalent only if they have the same parent and the same generation number. In order that the parent context pointer remains valid (and is not reused), we increment the parent context's reference count for each context cloned from it. Since we don't have individual fds for the counters in a cloned context, the only thing that can make two clones of a given parent different after they have been cloned is enabling or disabling all counters with prctl. To account for this, we keep a count of the number of enabled counters in each context. Two contexts must have the same number of enabled counters to be considered equivalent. Here are some measurements of the context switch time as measured with the lat_ctx benchmark from lmbench, comparing the times obtained with and without this patch series: -----Unmodified----- With this patch series Counters: none 2 HW 4H+4S none 2 HW 4H+4S 2 processes: Average 3.44 6.45 11.24 3.12 3.39 3.60 St dev 0.04 0.04 0.13 0.05 0.17 0.19 8 processes: Average 6.45 8.79 14.00 5.57 6.23 7.57 St dev 1.27 1.04 0.88 1.42 1.46 1.42 32 processes: Average 5.56 8.43 13.78 5.28 5.55 7.15 St dev 0.41 0.47 0.53 0.54 0.57 0.81 The numbers are the mean and standard deviation of 20 runs of lat_ctx. The "none" columns are lat_ctx run directly without any counters. The "2 HW" columns are with lat_ctx run under perfstat, counting cycles and instructions. The "4H+4S" columns are lat_ctx run under perfstat with 4 hardware counters and 4 software counters (cycles, instructions, cache references, cache misses, task clock, context switch, cpu migrations, and page faults). [ Impact: performance optimization of counter context-switches ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 071309005468..4cae01a50450 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -513,6 +513,7 @@ struct perf_counter_context { struct list_head event_list; int nr_counters; int nr_active; + int nr_enabled; int is_active; atomic_t refcount; struct task_struct *task; @@ -522,6 +523,14 @@ struct perf_counter_context { */ u64 time; u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_counter_context *parent_ctx; + u32 parent_gen; + u32 generation; }; /** @@ -552,7 +561,8 @@ extern int perf_max_counters; extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); -extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); -- cgit v1.2.3 From 910431c7f2e963017d767b29c80ae706421e569f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 May 2009 12:32:15 +0200 Subject: perf_counter: fix !PERF_COUNTERS build failure Update the !CONFIG_PERF_COUNTERS prototype too, for perf_counter_task_sched_out(). [ Impact: build fix ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4cae01a50450..2eedae8498d3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -625,7 +625,8 @@ extern void perf_counter_init(void); static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void -perf_counter_task_sched_out(struct task_struct *task, int cpu) { } +perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } -- cgit v1.2.3 From 86ed3669f068b514ab85ffd548456a342b3fb8d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 May 2009 15:01:19 +0100 Subject: ASoC: WM9081 mono DAC with integrated 2.6W class AB/D amplifier driver The WM9081 is designed to provide high power output at low distortion levels in space-constrained portable applications. Signed-off-by: Mark Brown --- include/sound/wm9081.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/sound/wm9081.h (limited to 'include') diff --git a/include/sound/wm9081.h b/include/sound/wm9081.h new file mode 100644 index 000000000000..e173ddbf6bd4 --- /dev/null +++ b/include/sound/wm9081.h @@ -0,0 +1,25 @@ +/* + * linux/sound/wm9081.h -- Platform data for WM9081 + * + * Copyright 2009 Wolfson Microelectronics. PLC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_SND_WM_9081_H +#define __LINUX_SND_WM_9081_H + +struct wm9081_retune_mobile_setting { + const char *name; + unsigned int rate; + u16 config[20]; +}; + +struct wm9081_retune_mobile_config { + struct wm9081_retune_mobile_setting *configs; + int num_configs; +}; + +#endif -- cgit v1.2.3 From 28ee9bc5cc42776e0364399b401a64906ac1ac8e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 22 May 2009 16:23:38 +0200 Subject: ide: report timeouts in ide_busy_sleep() * change 'hwif' argument to 'drive' * report an error on timeout Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078f..9fed365a598b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1109,7 +1109,7 @@ void ide_fix_driveid(u16 *); extern void ide_fixstring(u8 *, const int, const int); -int ide_busy_sleep(ide_hwif_t *, unsigned long, int); +int ide_busy_sleep(ide_drive_t *, unsigned long, int); int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); -- cgit v1.2.3 From 5993856e53fbc4b4f28e2d481deaebeb715b1267 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 22 May 2009 16:23:39 +0200 Subject: via82cxxx: Add VIA VX855 PCI Device ID This patch adds the PCI Device ID 0xc409 to the PCI ID table of via82cxxx.c, as well as the 0x8409 south bridge ID. This is required to make the IDE driver work on the VX855/VX875 integrated chipset. Signed-off-by: Harald Welte Cc: Joseph Chan Cc: Bruce Chang Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/pci_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06ba90c211a5..0f71812d67d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1406,7 +1406,7 @@ #define PCI_DEVICE_ID_VIA_82C598_1 0x8598 #define PCI_DEVICE_ID_VIA_838X_1 0xB188 #define PCI_DEVICE_ID_VIA_83_87XX_1 0xB198 -#define PCI_DEVICE_ID_VIA_C409_IDE 0XC409 +#define PCI_DEVICE_ID_VIA_VX855_IDE 0xC409 #define PCI_DEVICE_ID_VIA_ANON 0xFFFF #define PCI_VENDOR_ID_SIEMENS 0x110A -- cgit v1.2.3 From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 +++++++------- include/linux/device-mapper.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 56ce53fce72e..872b78b7a101 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -391,7 +391,7 @@ struct request_queue unsigned int max_hw_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned int max_segment_size; unsigned long seg_boundary_mask; @@ -901,7 +901,7 @@ extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); @@ -988,19 +988,19 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -static inline int queue_hardsect_size(struct request_queue *q) +static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; - if (q && q->hardsect_size) - retval = q->hardsect_size; + if (q && q->logical_block_size) + retval = q->logical_block_size; return retval; } -static inline int bdev_hardsect_size(struct block_device *bdev) +static inline unsigned short bdev_logical_block_size(struct block_device *bdev) { - return queue_hardsect_size(bdev_get_queue(bdev)); + return queue_logical_block_size(bdev_get_queue(bdev)); } static inline int queue_dma_alignment(struct request_queue *q) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ded2d7c42668..49c2362977fd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -149,7 +149,7 @@ struct io_restrictions { unsigned max_hw_sectors; unsigned max_sectors; unsigned max_segment_size; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; unsigned char no_cluster; /* inverted so that 0 is default */ -- cgit v1.2.3 From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:50 -0400 Subject: block: Use accessor functions for queue limits Convert all external users of queue limits to using wrapper functions instead of poking the request queue variables directly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blkdev.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index d30ec6f30dd7..12737be58601 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -279,7 +279,7 @@ static inline int bio_has_allocated_vec(struct bio *bio) #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ - __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask) + __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) #define BIO_SEG_BOUNDARY(q, b1, b2) \ BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 872b78b7a101..29b48f7b4ba8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -898,6 +898,7 @@ extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); +extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); @@ -988,6 +989,41 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) +static inline unsigned long queue_bounce_pfn(struct request_queue *q) +{ + return q->bounce_pfn; +} + +static inline unsigned long queue_segment_boundary(struct request_queue *q) +{ + return q->seg_boundary_mask; +} + +static inline unsigned int queue_max_sectors(struct request_queue *q) +{ + return q->max_sectors; +} + +static inline unsigned int queue_max_hw_sectors(struct request_queue *q) +{ + return q->max_hw_sectors; +} + +static inline unsigned short queue_max_hw_segments(struct request_queue *q) +{ + return q->max_hw_segments; +} + +static inline unsigned short queue_max_phys_segments(struct request_queue *q) +{ + return q->max_phys_segments; +} + +static inline unsigned int queue_max_segment_size(struct request_queue *q) +{ + return q->max_segment_size; +} + static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; -- cgit v1.2.3 From 025146e13b63483add912706c101fb0fb6f015cc Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:51 -0400 Subject: block: Move queue limits to an embedded struct To accommodate stacking drivers that do not have an associated request queue we're moving the limits to a separate, embedded structure. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 29b48f7b4ba8..b7bb6fdba12c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -307,6 +307,21 @@ struct blk_cmd_filter { struct kobject kobj; }; +struct queue_limits { + unsigned long bounce_pfn; + unsigned long seg_boundary_mask; + + unsigned int max_hw_sectors; + unsigned int max_sectors; + unsigned int max_segment_size; + + unsigned short logical_block_size; + unsigned short max_hw_segments; + unsigned short max_phys_segments; + + unsigned char no_cluster; +}; + struct request_queue { /* @@ -358,7 +373,6 @@ struct request_queue /* * queue needs bounce pages for pages above this limit */ - unsigned long bounce_pfn; gfp_t bounce_gfp; /* @@ -387,14 +401,6 @@ struct request_queue unsigned int nr_congestion_off; unsigned int nr_batching; - unsigned int max_sectors; - unsigned int max_hw_sectors; - unsigned short max_phys_segments; - unsigned short max_hw_segments; - unsigned short logical_block_size; - unsigned int max_segment_size; - - unsigned long seg_boundary_mask; void *dma_drain_buffer; unsigned int dma_drain_size; unsigned int dma_pad_mask; @@ -410,6 +416,8 @@ struct request_queue struct timer_list timeout; struct list_head timeout_list; + struct queue_limits limits; + /* * sg stuff */ @@ -991,45 +999,45 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); static inline unsigned long queue_bounce_pfn(struct request_queue *q) { - return q->bounce_pfn; + return q->limits.bounce_pfn; } static inline unsigned long queue_segment_boundary(struct request_queue *q) { - return q->seg_boundary_mask; + return q->limits.seg_boundary_mask; } static inline unsigned int queue_max_sectors(struct request_queue *q) { - return q->max_sectors; + return q->limits.max_sectors; } static inline unsigned int queue_max_hw_sectors(struct request_queue *q) { - return q->max_hw_sectors; + return q->limits.max_hw_sectors; } static inline unsigned short queue_max_hw_segments(struct request_queue *q) { - return q->max_hw_segments; + return q->limits.max_hw_segments; } static inline unsigned short queue_max_phys_segments(struct request_queue *q) { - return q->max_phys_segments; + return q->limits.max_phys_segments; } static inline unsigned int queue_max_segment_size(struct request_queue *q) { - return q->max_segment_size; + return q->limits.max_segment_size; } static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; - if (q && q->logical_block_size) - retval = q->logical_block_size; + if (q && q->limits.logical_block_size) + retval = q->limits.logical_block_size; return retval; } -- cgit v1.2.3 From c72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:53 -0400 Subject: block: Export I/O topology for block devices and partitions To support devices with physical block sizes bigger than 512 bytes we need to ensure proper alignment. This patch adds support for exposing I/O topology characteristics as devices are stacked. logical_block_size is the smallest unit the device can address. physical_block_size indicates the smallest I/O the device can write without incurring a read-modify-write penalty. The io_min parameter is the smallest preferred I/O size reported by the device. In many cases this is the same as the physical block size. However, the io_min parameter can be scaled up when stacking (RAID5 chunk size > physical block size). The io_opt characteristic indicates the optimal I/O size reported by the device. This is usually the stripe width for arrays. The alignment_offset parameter indicates the number of bytes the start of the device/partition is offset from the device's natural alignment. Partition tools and MD/DM utilities can use this to pad their offsets so filesystems start on proper boundaries. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/genhd.h | 1 + 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b7bb6fdba12c..5e740a135e73 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -314,11 +314,16 @@ struct queue_limits { unsigned int max_hw_sectors; unsigned int max_sectors; unsigned int max_segment_size; + unsigned int physical_block_size; + unsigned int alignment_offset; + unsigned int io_min; + unsigned int io_opt; unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; + unsigned char misaligned; unsigned char no_cluster; }; @@ -911,6 +916,15 @@ extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_alignment_offset(struct request_queue *q, + unsigned int alignment); +extern void blk_queue_io_min(struct request_queue *q, unsigned int min); +extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, + sector_t offset); +extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, + sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); @@ -1047,6 +1061,39 @@ static inline unsigned short bdev_logical_block_size(struct block_device *bdev) return queue_logical_block_size(bdev_get_queue(bdev)); } +static inline unsigned int queue_physical_block_size(struct request_queue *q) +{ + return q->limits.physical_block_size; +} + +static inline unsigned int queue_io_min(struct request_queue *q) +{ + return q->limits.io_min; +} + +static inline unsigned int queue_io_opt(struct request_queue *q) +{ + return q->limits.io_opt; +} + +static inline int queue_alignment_offset(struct request_queue *q) +{ + if (q && q->limits.misaligned) + return -1; + + if (q && q->limits.alignment_offset) + return q->limits.alignment_offset; + + return 0; +} + +static inline int queue_sector_alignment_offset(struct request_queue *q, + sector_t sector) +{ + return ((sector << 9) - q->limits.alignment_offset) + & (q->limits.io_min - 1); +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a1a28caed23d..149fda264c86 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -90,6 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; + sector_t alignment_offset; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3 From df391e0eda1e678add56a8e34226edf05d89af6a Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sat, 23 May 2009 09:51:20 -0700 Subject: Input: multitouch - add tracking ID to the protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few multi-touch devices that support finger tracking well in hardware, Stantum being the prime example. By exposing the tracking ID in the MT protocol, evdev bandwidth and cpu usage in user space can be reduced. This patch adds the ABS_MT_TRACKING_ID to the MT protocol. Signed-off-by: Henrik Rydberg Tested-by: Stéphane Chatty Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 0e6ff5de3588..6fed4f6a9c9e 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -656,6 +656,7 @@ struct input_absinfo { #define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ #define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ #define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ +#define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) -- cgit v1.2.3 From e220d2dcb944c5c488b6855d15ec66d76900514f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:28:55 +0200 Subject: perf_counter: Fix dynamic irq_period logging We call perf_adjust_freq() from perf_counter_task_tick() which is is called under the rq->lock causing lock recursion. However, it's no longer required to be called under the rq->lock, so remove it from under it. Also, fix up some related comments. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163012.476197912@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2eedae8498d3..23ddd29730f8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -260,6 +260,7 @@ enum perf_event_type { /* * struct { * struct perf_event_header header; + * u64 time; * u64 irq_period; * }; */ -- cgit v1.2.3 From fccc714b3148ab9741fafc1e90c3876d50df6093 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:28:56 +0200 Subject: perf_counter: Sanitize counter->mutex s/counter->mutex/counter->child_mutex/ and make sure its only used to protect child_list. The usage in __perf_counter_exit_task() doesn't appear to be problematic since ctx->mutex also covers anything related to fd tear-down. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163012.533186528@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 23ddd29730f8..4ab8050eb9e8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -452,9 +452,6 @@ struct perf_counter { struct perf_counter_context *ctx; struct file *filp; - struct perf_counter *parent; - struct list_head child_list; - /* * These accumulate total time (in nanoseconds) that children * counters have been enabled and running, respectively. @@ -465,7 +462,9 @@ struct perf_counter { /* * Protect attach/detach and child_list: */ - struct mutex mutex; + struct mutex child_mutex; + struct list_head child_list; + struct perf_counter *parent; int oncpu; int cpu; -- cgit v1.2.3 From 10eb0f013c63c71c82ede77945a5f390c10cfda6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:38 -0500 Subject: [SCSI] iscsi: pass ep connect shost When we create the tcp/ip connection by calling ep_connect, we currently just go by the routing table info. I think there are two problems with this. 1. Some drivers do not have access to a routing table. Some drivers like qla4xxx do not even know about other ports. 2. If you have two initiator ports on the same subnet, the user may have set things up so that session1 was supposed to be run through port1. and session2 was supposed to be run through port2. It looks like we could end with both sessions going through one of the ports. Fixes for cxgb3i from Karen Xie. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/iscsi_if.h | 7 ++++++- include/scsi/scsi_transport_iscsi.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index d0ed5226f8c4..2c1a4af9eafb 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -50,7 +50,8 @@ enum iscsi_uevent_e { ISCSI_UEVENT_TGT_DSCVR = UEVENT_BASE + 15, ISCSI_UEVENT_SET_HOST_PARAM = UEVENT_BASE + 16, ISCSI_UEVENT_UNBIND_SESSION = UEVENT_BASE + 17, - ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, + ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, + ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST = UEVENT_BASE + 19, /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, @@ -131,6 +132,10 @@ struct iscsi_uevent { struct msg_transport_connect { uint32_t non_blocking; } ep_connect; + struct msg_transport_connect_through_host { + uint32_t host_no; + uint32_t non_blocking; + } ep_connect_through_host; struct msg_transport_poll { uint64_t ep_handle; uint32_t timeout_ms; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 457588e1119b..8cb7a31d9961 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -126,7 +126,8 @@ struct iscsi_transport { int *index, int *age); void (*session_recovery_timedout) (struct iscsi_cls_session *session); - struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr, + struct iscsi_endpoint *(*ep_connect) (struct Scsi_Host *shost, + struct sockaddr *dst_addr, int non_blocking); int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms); void (*ep_disconnect) (struct iscsi_endpoint *ep); -- cgit v1.2.3 From 8f9256cea10ca43ac80f66e176643eb41db34244 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:41 -0500 Subject: [SCSI] libiscsi: export iscsi_itt_to_task for bnx2i bnx2i needs to be able to look up mgmt task like login and nop, because it does some processing of them on the completion path. This exports iscsi_itt_to_task so it can look up the task. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 0289f5745fb9..88d33a46efa5 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -406,6 +406,7 @@ extern int __iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *, char *, int); extern int iscsi_verify_itt(struct iscsi_conn *, itt_t); extern struct iscsi_task *iscsi_itt_to_ctask(struct iscsi_conn *, itt_t); +extern struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *, itt_t); extern void iscsi_requeue_task(struct iscsi_task *task); extern void iscsi_put_task(struct iscsi_task *task); extern void __iscsi_get_task(struct iscsi_task *task); -- cgit v1.2.3 From 3bbaaad95fd38dedb7c66a601f14825b4e0c5a59 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:46 -0500 Subject: [SCSI] libiscsi: handle cleanup task races bnx2i needs to send a hardware specific cleanup command if a command has not completed normally (iscsi/scsi response from target), and the session is still ok (this is the case when we send a TMF to stop the command). At this time it will need to drop the session lock. The problem with the current code is that fail_all_commands assumes we will hold the lock the entire time, so it uses list_for_each_entry_safe. If while bnx2i drops the session lock multiple cmds complete then list_for_each_entry_safe will not handle this correctly. This patch removes the running lists and just has us loop over the cmds array (in later patches we will then replace that array with a block tag map at the session level). It also fixes up the completion path so that if the TMF code and the normal recv path were completing the same command then they both do not try to do release the refcount taken when the task is queued. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 88d33a46efa5..facae71183a5 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -82,6 +82,7 @@ enum { enum { + ISCSI_TASK_FREE, ISCSI_TASK_COMPLETED, ISCSI_TASK_PENDING, ISCSI_TASK_RUNNING, @@ -181,9 +182,7 @@ struct iscsi_conn { /* xmit */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ - struct list_head mgmt_run_list; /* list of control tasks */ - struct list_head xmitqueue; /* data-path cmd queue */ - struct list_head run_list; /* list of cmds in progress */ + struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ struct work_struct xmitwork; /* per-conn. xmit workqueue */ unsigned long suspend_tx; /* suspend Tx */ -- cgit v1.2.3 From b3cd5050bf8eb32ceecee129cac7c59e6f1668c4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:49 -0500 Subject: [SCSI] libiscsi: add task aborted state If a task did not complete normally due to a TMF, libiscsi will now complete the task with the state ISCSI_TASK_ABRT_TMF. Drivers like bnx2i that need to free resources if a command did not complete normally can then check the task state. If a driver does not need to send a special command if we have dropped the session then they can check for ISCSI_TASK_ABRT_SESS_RECOV. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index facae71183a5..196525cd402f 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -86,6 +86,8 @@ enum { ISCSI_TASK_COMPLETED, ISCSI_TASK_PENDING, ISCSI_TASK_RUNNING, + ISCSI_TASK_ABRT_TMF, /* aborted due to TMF */ + ISCSI_TASK_ABRT_SESS_RECOV, /* aborted due to session recovery */ }; struct iscsi_r2t_info { -- cgit v1.2.3 From a366695592ebc9151dd5a248681270f0925d8324 Mon Sep 17 00:00:00 2001 From: Abhijeet Joglekar Date: Fri, 1 May 2009 10:01:26 -0700 Subject: [SCSI] libfc,fcoe,fnic: Separate rport and lport max retry counts This allows fnic to configure number of retries for lport and rport separately. Signed-off-by: Abhijeet Joglekar Acked-by: Robert Love Signed-off-by: James Bottomley --- include/scsi/libfc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 45f9cc642c46..ebdd9f4cf070 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -679,6 +679,7 @@ struct fc_lport { unsigned int e_d_tov; unsigned int r_a_tov; u8 max_retry_count; + u8 max_rport_retry_count; u16 link_speed; u16 link_supported_speeds; u16 lro_xid; /* max xid for fcoe lro */ -- cgit v1.2.3 From 082ff5a2767a0679ee543f14883adbafb631ffbe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:29:00 +0200 Subject: perf_counter: Change pctrl() behaviour Instead of en/dis-abling all counters acting on a particular task, en/dis- able all counters we created. [ v2: fix crash on first counter enable ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163012.916937244@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 10 ++++++++++ include/linux/perf_counter.h | 3 +++ include/linux/sched.h | 2 ++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641f..353c0ac7723a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,6 +108,15 @@ extern struct group_info init_groups; extern struct cred init_cred; +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_mutex = \ + __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ + .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -171,6 +180,7 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ + INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4ab8050eb9e8..4159ee5940f8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -469,6 +469,9 @@ struct perf_counter { int oncpu; int cpu; + struct list_head owner_entry; + struct task_struct *owner; + /* mmap bits */ struct mutex mmap_mutex; atomic_t mmap_count; diff --git a/include/linux/sched.h b/include/linux/sched.h index 9714d450f417..bc9326dcdde1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1389,6 +1389,8 @@ struct task_struct { #endif #ifdef CONFIG_PERF_COUNTERS struct perf_counter_context *perf_counter_ctxp; + struct mutex perf_counter_mutex; + struct list_head perf_counter_list; #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; -- cgit v1.2.3 From 475c55797323b67435083f6e2eb8ee670f6410ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:29:01 +0200 Subject: perf_counter: Remove perf_counter_context::nr_enabled now that pctrl() no longer disables other people's counters, remove the PMU cache code that deals with that. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163013.032998331@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4159ee5940f8..2ddf5e3c5518 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -516,7 +516,6 @@ struct perf_counter_context { struct list_head event_list; int nr_counters; int nr_active; - int nr_enabled; int is_active; atomic_t refcount; struct task_struct *task; -- cgit v1.2.3 From e527ea312f31e88a7fa5472b71db71c565b0d44f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 14:45:25 +0200 Subject: perf_counter: Remove unused ABI bits extra_config_len isn't used for anything, remove it. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525124600.116035832@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2ddf5e3c5518..b1f2bac09f97 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -154,11 +154,11 @@ struct perf_counter_hw_event { __reserved_1 : 51; - __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ + __u32 __reserved_2; - __u64 __reserved_2; __u64 __reserved_3; + __u64 __reserved_4; }; /* -- cgit v1.2.3 From 6ab423e0eaca827fbd201ca4ae7d4f8573a366b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 14:45:27 +0200 Subject: perf_counter: Propagate inheritance failures down the fork() path Fail fork() when we fail inheritance for some reason (-ENOMEM most likely). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525124600.324656474@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b1f2bac09f97..d3e85de9bf1e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -566,7 +566,7 @@ extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern void perf_counter_init_task(struct task_struct *child); +extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); @@ -631,7 +631,7 @@ perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline void perf_counter_init_task(struct task_struct *child) { } +static inline int perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } -- cgit v1.2.3 From 48e22d56ecdeddd1ffb42a02fccba5c6ef42b133 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 17:39:04 +0200 Subject: perf_counter: x86: Remove interrupt throttle remove the x86 specific interrupt throttle Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525153931.616671838@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d3e85de9bf1e..0c160be2078f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -570,7 +570,6 @@ extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); -extern void perf_counter_unthrottle(void); extern void __perf_disable(void); extern bool __perf_enable(void); extern void perf_disable(void); @@ -635,7 +634,6 @@ static inline int perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } -static inline void perf_counter_unthrottle(void) { } static inline void perf_disable(void) { } static inline void perf_enable(void) { } static inline int perf_counter_task_disable(void) { return -EINVAL; } -- cgit v1.2.3 From a78ac3258782f3e64cb40beb5990808e1febcc0c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 17:39:05 +0200 Subject: perf_counter: Generic per counter interrupt throttle Introduce a generic per counter interrupt throttle. This uses the perf_counter_overflow() quick disable to throttle a specific counter when its going too fast when a pmu->unthrottle() method is provided which can undo the quick disable. Power needs to implement both the quick disable and the unthrottle method. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525153931.703093461@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0c160be2078f..e3a7585d3e43 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -266,6 +266,15 @@ enum perf_event_type { */ PERF_EVENT_PERIOD = 4, + /* + * struct { + * struct perf_event_header header; + * u64 time; + * }; + */ + PERF_EVENT_THROTTLE = 5, + PERF_EVENT_UNTHROTTLE = 6, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -367,6 +376,7 @@ struct pmu { int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); + void (*unthrottle) (struct perf_counter *counter); }; /** @@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_priv; extern int sysctl_perf_counter_mlock; +extern int sysctl_perf_counter_limit; extern void perf_counter_init(void); -- cgit v1.2.3 From 0127c3ea082ee9f1034789b978dfc7fd83254617 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2009 22:03:26 +0200 Subject: perf_counter: fix warning & lockup - remove bogus warning - fix wakeup from NMI path lockup - also fix up whitespace noise in perf_counter.h Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525153931.703093461@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 78 ++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e3a7585d3e43..2b16ed37b74c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,7 +73,7 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -#define __PERF_COUNTER_MASK(name) \ +#define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -98,14 +98,14 @@ enum sw_event_ids { * in the overflow packets. */ enum perf_counter_record_format { - PERF_RECORD_IP = 1U << 0, - PERF_RECORD_TID = 1U << 1, - PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_ADDR = 1U << 3, - PERF_RECORD_GROUP = 1U << 4, - PERF_RECORD_CALLCHAIN = 1U << 5, - PERF_RECORD_CONFIG = 1U << 6, - PERF_RECORD_CPU = 1U << 7, + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_TIME = 1U << 2, + PERF_RECORD_ADDR = 1U << 3, + PERF_RECORD_GROUP = 1U << 4, + PERF_RECORD_CALLCHAIN = 1U << 5, + PERF_RECORD_CONFIG = 1U << 6, + PERF_RECORD_CPU = 1U << 7, }; /* @@ -235,13 +235,13 @@ enum perf_event_type { * correlate userspace IPs to code. They have the following structure: * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; * }; */ PERF_EVENT_MMAP = 1, @@ -249,27 +249,27 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u32 pid, tid; - * char comm[]; + * u32 pid, tid; + * char comm[]; * }; */ PERF_EVENT_COMM = 3, /* * struct { - * struct perf_event_header header; - * u64 time; - * u64 irq_period; + * struct perf_event_header header; + * u64 time; + * u64 irq_period; * }; */ PERF_EVENT_PERIOD = 4, /* * struct { - * struct perf_event_header header; - * u64 time; + * struct perf_event_header header; + * u64 time; * }; */ PERF_EVENT_THROTTLE = 5, @@ -280,23 +280,23 @@ enum perf_event_type { * will be PERF_RECORD_* * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * { u64 ip; } && PERF_RECORD_IP - * { u32 pid, tid; } && PERF_RECORD_TID - * { u64 time; } && PERF_RECORD_TIME - * { u64 addr; } && PERF_RECORD_ADDR - * { u64 config; } && PERF_RECORD_CONFIG - * { u32 cpu, res; } && PERF_RECORD_CPU + * { u64 ip; } && PERF_RECORD_IP + * { u32 pid, tid; } && PERF_RECORD_TID + * { u64 time; } && PERF_RECORD_TIME + * { u64 addr; } && PERF_RECORD_ADDR + * { u64 config; } && PERF_RECORD_CONFIG + * { u32 cpu, res; } && PERF_RECORD_CPU * - * { u64 nr; - * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP + * { u64 nr; + * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP * - * { u16 nr, - * hv, - * kernel, - * user; - * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN + * { u16 nr, + * hv, + * kernel, + * user; + * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN * }; */ }; @@ -406,7 +406,7 @@ struct perf_mmap_data { atomic_t wakeup; /* needs a wakeup */ struct perf_counter_mmap_page *user_page; - void *data_pages[0]; + void *data_pages[0]; }; struct perf_pending_entry { @@ -422,7 +422,7 @@ struct perf_counter { struct list_head list_entry; struct list_head event_entry; struct list_head sibling_list; - int nr_siblings; + int nr_siblings; struct perf_counter *group_leader; const struct pmu *pmu; -- cgit v1.2.3 From b0aae68cc5508f3c2fbf728988c954db4c8b8a53 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 21 May 2009 13:59:18 +0800 Subject: tracing/events: change the type of __str_loc_item to unsigned short When defining a dynamic size string, we add __str_loc_##item to the trace entry, and it stores the location of the actual string in entry->_str_data[] 'unsigned short' should be sufficient to store this information, thus we save 2 bytes per dyn-size string in the ring buffer. [ Impact: reduce memory occupied by dyn-size strings in ring buffer ] Signed-off-by: Li Zefan Cc: Steven Rostedt LKML-Reference: <4A14EDB6.2050507@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index edb02bc9f8ff..b5ff2e8229ec 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -25,7 +25,7 @@ #define __field(type, item) type item; #undef __string -#define __string(item, src) int __str_loc_##item; +#define __string(item, src) unsigned short __str_loc_##item; #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -- cgit v1.2.3 From be74b73a57645cc253d881ab0c1014eb64b9cf22 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 26 May 2009 20:25:22 +0200 Subject: tracing: add __print_flags for events Developers have been asking for the ability in the ftrace event tracer to display names of bits in a flags variable. Instead of printing out c2, it would be easier to read FOO|BAR|GOO, assuming that FOO is bit 1, BAR is bit 6 and GOO is bit 7. Some examples where this would be useful are the state flags in a context switch, kmalloc flags, and even permision flags in accessing files. [ v2 changes include: Frederic Weisbecker's idea of using a mask instead of bits, thus we can output GFP_KERNEL instead of GPF_WAIT|GFP_IO|GFP_FS. Li Zefan's idea of allowing the caller of __print_flags to add their own delimiter (or no delimiter) where we can get for file permissions rwx instead of r|w|x. ] [ v3 changes: Christoph Hellwig's idea of using an array instead of va_args. ] [ Impact: better displaying of flags in trace output ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 13 ++++++++++++- include/trace/ftrace.h | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bae51ddfabd3..4b58cf1a11c2 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -3,12 +3,23 @@ #include #include - +#include struct trace_array; struct tracer; struct dentry; +DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq); + +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b5ff2e8229ec..22c94719c569 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -87,6 +87,7 @@ * struct trace_seq *s = &iter->seq; * struct ftrace_raw_ *field; <-- defined in stage 1 * struct trace_entry *entry; + * struct trace_seq *p; * int ret; * * entry = iter->ent; @@ -98,7 +99,9 @@ * * field = (typeof(field))entry; * + * p = get_cpu_var(ftrace_event_seq); * ret = trace_seq_printf(s, "\n"); + * put_cpu(); * if (!ret) * return TRACE_TYPE_PARTIAL_LINE; * @@ -119,6 +122,14 @@ #undef __get_str #define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) +#undef __print_flags +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags flags[] = \ + { flag_array, { -1, NULL }}; \ + ftrace_print_flags_seq(p, delim, flag, flags); \ + }) + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ @@ -127,6 +138,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ struct trace_entry *entry; \ + struct trace_seq *p; \ int ret; \ \ entry = iter->ent; \ @@ -138,7 +150,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ + p = &get_cpu_var(ftrace_event_seq); \ ret = trace_seq_printf(s, #call ": " print); \ + put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ -- cgit v1.2.3 From 937cdb9db7f59278d0cb1582e6e64e3dfd73b4fc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 15 May 2009 10:51:13 -0400 Subject: tracing: add previous task state info to sched switch event It is useful to see the state of a task that is being switched out. This patch adds the output of the state of the previous task in the context switch event. [ Impact: see state of switched out task in context switch ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/events/sched.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index dd4033cf5b09..24ab5bcff7b2 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -156,6 +156,7 @@ TRACE_EVENT(sched_switch, __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) + __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) @@ -165,13 +166,19 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; + __entry->prev_state = prev->state; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->prev_state ? + __print_flags(__entry->prev_state, "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "W" }) : "R", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); -- cgit v1.2.3 From 62ba180e80f4194a498585ac0e4c07daa8ca08d1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 15 May 2009 16:16:30 -0400 Subject: tracing: add flag output for kmem events This patch changes the output for gfp_flags from being a simple hex value to the actual names. gfp_flags=GFP_ATOMIC instead of gfp_flags=00000020 And even gfp_flags=GFP_KERNEL instead of gfp_flags=000000d0 (Thanks to Frederic Weisbecker for pointing out that the first version had a bad order of GFP masks) [ Impact: more human readable output from tracer ] Acked-by: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/events/kmem.h | 53 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index c22c42f980b5..9baba50d6512 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -7,6 +7,43 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kmem +/* + * The order of these masks is important. Matching masks will be seen + * first and the left over flags will end up showing by themselves. + * + * For example, if we have GFP_KERNEL before GFP_USER we wil get: + * + * GFP_KERNEL|GFP_HARDWALL + * + * Thus most bits set go first. + */ +#define show_gfp_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ + {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ + {(unsigned long)GFP_USER, "GFP_USER"}, \ + {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ + {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ + {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ + {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ + {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ + {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ + {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \ + {(unsigned long)__GFP_IO, "GFP_IO"}, \ + {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ + {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ + {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ + {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ + {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ + {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ + {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ + {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ + {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ + {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ + {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + ) : "GFP_NOWAIT" + TRACE_EVENT(kmalloc, TP_PROTO(unsigned long call_site, @@ -33,12 +70,12 @@ TRACE_EVENT(kmalloc, __entry->gfp_flags = gfp_flags; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags) + show_gfp_flags(__entry->gfp_flags)) ); TRACE_EVENT(kmem_cache_alloc, @@ -67,12 +104,12 @@ TRACE_EVENT(kmem_cache_alloc, __entry->gfp_flags = gfp_flags; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags) + show_gfp_flags(__entry->gfp_flags)) ); TRACE_EVENT(kmalloc_node, @@ -104,12 +141,12 @@ TRACE_EVENT(kmalloc_node, __entry->node = node; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags, + show_gfp_flags(__entry->gfp_flags), __entry->node) ); @@ -142,12 +179,12 @@ TRACE_EVENT(kmem_cache_alloc_node, __entry->node = node; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags, + show_gfp_flags(__entry->gfp_flags), __entry->node) ); -- cgit v1.2.3 From 0f4fc29dd68dfab9c6ddd5d087d34a5b6818cb00 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 May 2009 19:21:47 -0400 Subject: tracing: add __print_symbolic to trace events This patch adds __print_symbolic which is similar to __print_flags but works for an enumeration type instead. That is, there is only a one to one mapping between the values and the symbols. When a match is made, then it is printed, otherwise the hex value is outputed. [ Impact: add interface for showing symbol names in events ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 3 +++ include/trace/ftrace.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4b58cf1a11c2..bbf40f624fc8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -20,6 +20,9 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); +const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 22c94719c569..87fc227c6fbe 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -130,6 +130,14 @@ ftrace_print_flags_seq(p, delim, flag, flags); \ }) +#undef __print_symbolic +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array, { -1, NULL }}; \ + ftrace_print_symbols_seq(p, value, symbols); \ + }) + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ -- cgit v1.2.3 From c2adae0970ca1db8adb92fb56ae3bcabd916e8bd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 May 2009 19:56:19 -0400 Subject: tracing: convert irq events to use __print_symbolic The recording of the names at trace time is inefficient. This patch implements the softirq event recording to only record the vector and then use the __print_symbolic interface to print out the names. [ Impact: faster recording of softirq events ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/events/irq.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 32a9f7ef432b..683fb36a9943 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -7,6 +7,19 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq +#define softirq_name(sirq) { sirq, #sirq } +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI_SOFTIRQ), \ + softirq_name(TIMER_SOFTIRQ), \ + softirq_name(NET_TX_SOFTIRQ), \ + softirq_name(NET_RX_SOFTIRQ), \ + softirq_name(BLOCK_SOFTIRQ), \ + softirq_name(TASKLET_SOFTIRQ), \ + softirq_name(SCHED_SOFTIRQ), \ + softirq_name(HRTIMER_SOFTIRQ), \ + softirq_name(RCU_SOFTIRQ)) + /** * irq_handler_entry - called immediately before the irq action handler * @irq: irq number @@ -87,15 +100,14 @@ TRACE_EVENT(softirq_entry, TP_STRUCT__entry( __field( int, vec ) - __string( name, softirq_to_name[h-vec] ) ), TP_fast_assign( __entry->vec = (int)(h - vec); - __assign_str(name, softirq_to_name[h-vec]); ), - TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) ); /** @@ -117,15 +129,14 @@ TRACE_EVENT(softirq_exit, TP_STRUCT__entry( __field( int, vec ) - __string( name, softirq_to_name[h-vec] ) ), TP_fast_assign( __entry->vec = (int)(h - vec); - __assign_str(name, softirq_to_name[h-vec]); ), - TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) ); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.3 From ab6bf42e2339580b5d87746d0ff4da4b1578b03e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 27 May 2009 14:38:34 -0700 Subject: mlx4_core: Add module parameter for number of MTTs per segment The current MTT allocator uses kmalloc() to allocate a buffer for its buddy allocator, and thus is limited in the amount of MTT segments that it can control. As a result, the size of memory that can be registered is limited too. This patch uses a module parameter to control the number of MTT entries that each segment represents, allowing more memory to be registered with the same number of segments. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3aff8a6a389e..ce7cc6c7bcbb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -210,6 +210,7 @@ struct mlx4_caps { int num_comp_vectors; int num_mpts; int num_mtt_segs; + int mtts_per_seg; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; -- cgit v1.2.3 From f2aebaee653a35b01c3665de2cbb1e31456b8ea8 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Wed, 27 May 2009 21:36:02 +0800 Subject: ftrace: don't convert function's local variable name in macro "call" is an argument of macro, but it is also used as a local variable name of function in macro. We should keep this local variable name distinct from any CPP macro parameter name if both are in the same macro scope, although it hasn't caused any problem yet. [ Impact: robustify macro ] Signed-off-by: Zhao Lei Acked-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 87fc227c6fbe..b4ec83ae711f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -397,19 +397,19 @@ static void ftrace_profile_##call(proto) \ perf_tpcounter_event(event_##call.id); \ } \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ { \ int ret = 0; \ \ - if (!atomic_inc_return(&call->profile_count)) \ + if (!atomic_inc_return(&event_call->profile_count)) \ ret = register_trace_##call(ftrace_profile_##call); \ \ return ret; \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ { \ - if (atomic_add_negative(-1, &call->profile_count)) \ + if (atomic_add_negative(-1, &event_call->profile_count)) \ unregister_trace_##call(ftrace_profile_##call); \ } @@ -433,9 +433,9 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ #define __array(type, item, len) #undef __string -#define __string(item, src) \ - __str_offsets.item = __str_size + \ - offsetof(typeof(*entry), __str_data); \ +#define __string(item, src) \ + __str_offsets.item = __str_size + \ + offsetof(typeof(*entry), __str_data); \ __str_size += strlen(src) + 1; #undef __assign_str @@ -451,8 +451,8 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ - struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ - struct ftrace_event_call *call = &event_##call; \ + struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ + struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ @@ -473,7 +473,7 @@ static void ftrace_raw_event_##call(proto) \ \ assign; \ \ - if (!filter_current_check_discard(call, entry, event)) \ + if (!filter_current_check_discard(event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -- cgit v1.2.3 From d3e78ee3d015dac1794433abb6403b6fc8e70e10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 May 2009 11:41:50 +0200 Subject: perf_counter: Fix perf_counter_init_task() on !CONFIG_PERF_COUNTERS Pointed out by compiler warnings: tip/include/linux/perf_counter.h:644: warning: no return statement in function returning non-void Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2b16ed37b74c..a65ddc580514 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -641,7 +641,7 @@ perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline int perf_counter_init_task(struct task_struct *child) { } +static inline int perf_counter_init_task(struct task_struct *child) { return 0; } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } -- cgit v1.2.3 From c93f7669098eb97c5376e5396e3dfb734c17df4f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 28 May 2009 22:18:17 +1000 Subject: perf_counter: Fix race in attaching counters to tasks and exiting Commit 564c2b21 ("perf_counter: Optimize context switch between identical inherited contexts") introduced a race where it is possible that a counter being attached to a task could get attached to the wrong task, if the task is one that has inherited its context from another task via fork. This happens because the optimized context switch could switch the context to another task after find_get_context has read task->perf_counter_ctxp. In fact, it's possible that the context could then get freed, if the other task then exits. This fixes the problem by protecting both the context switch and the critical code in find_get_context with spinlocks. The context switch locks the cxt->lock of both the outgoing and incoming contexts before swapping them. That means that once code such as find_get_context has obtained the spinlock for the context associated with a task, the context can't get swapped to another task. However, the context may have been swapped in the interval between reading task->perf_counter_ctxp and getting the lock, so it is necessary to check and retry. To make sure that none of the contexts being looked at in find_get_context can get freed, this changes the context freeing code to use RCU. Thus an rcu_read_lock() is sufficient to ensure that no contexts can get freed. This part of the patch is lifted from a patch posted by Peter Zijlstra. This also adds a check to make sure that we can't add a counter to a task that is exiting. There is also a race between perf_counter_exit_task and find_get_context; this solves the race by moving the get_ctx that was in perf_counter_alloc into the locked region in find_get_context, so that once find_get_context has got the context for a task, it won't get freed even if the task calls perf_counter_exit_task. It doesn't matter if new top-level (non-inherited) counters get attached to the context after perf_counter_exit_task has detached the context from the task. They will just stay there and never get scheduled in until the counters' fds get closed, and then perf_release will remove them from the context and eventually free the context. With this, we are now doing the unclone in find_get_context rather than when a counter was added to or removed from a context (actually, we were missing the unclone_ctx() call when adding a counter to a context). We don't need to unclone when removing a counter from a context because we have no way to remove a counter from a cloned context. This also takes out the smp_wmb() in find_get_context, which Peter Zijlstra pointed out was unnecessary because the cmpxchg implies a full barrier anyway. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Mike Galbraith Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18974.33033.667187.273886@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a65ddc580514..717bf3b59ba4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -541,8 +541,9 @@ struct perf_counter_context { * been cloned (inherited) from a common ancestor. */ struct perf_counter_context *parent_ctx; - u32 parent_gen; - u32 generation; + u64 parent_gen; + u64 generation; + struct rcu_head rcu_head; }; /** -- cgit v1.2.3 From 2f102607ac77354b02a76cf2748598ce9f270f08 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 27 May 2009 23:59:58 -0400 Subject: i7300_idle: allow testing on i5000-series hardware w/o re-compile Testing the i7300_idle driver on i5000-series hardware required an edit to i7300_idle.h to "#define SUPPORT_I5000 1" and a re-build of both i7300_idle and ioat_dma. Replace that build-time scheme with a load-time module parameter: "7300_idle.forceload=1" to make it easier to test the driver on hardware that while not officially validated, works fine and is much more commonly available. By default (no modparam) the driver will continue to load only on the i7300. Note that ioat_dma runs a copy of i7300_idle's probe routine to know to reserve an IOAT channel for i7300_idle. This change makes ioat_dma do that always on the i5000, just like it does on the i7300. Signed-off-by: Len Brown Acked-by: Andrew Henroid --- include/linux/i7300_idle.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/i7300_idle.h b/include/linux/i7300_idle.h index 05a80c44513c..1587b7dec505 100644 --- a/include/linux/i7300_idle.h +++ b/include/linux/i7300_idle.h @@ -16,35 +16,33 @@ struct fbd_ioat { unsigned int vendor; unsigned int ioat_dev; + unsigned int enabled; }; /* * The i5000 chip-set has the same hooks as the i7300 - * but support is disabled by default because this driver - * has not been validated on that platform. + * but it is not enabled by default and must be manually + * manually enabled with "forceload=1" because it is + * only lightly validated. */ -#define SUPPORT_I5000 0 static const struct fbd_ioat fbd_ioat_list[] = { - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB}, -#if SUPPORT_I5000 - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT}, -#endif + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB, 1}, + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT, 0}, {0, 0} }; /* table of devices that work with this driver */ static const struct pci_device_id pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) }, -#if SUPPORT_I5000 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, -#endif { } /* Terminating entry */ }; /* Check for known platforms with I/O-AT */ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, - struct pci_dev **ioat_dev) + struct pci_dev **ioat_dev, + int enable_all) { int i; struct pci_dev *memdev, *dmadev; @@ -69,6 +67,8 @@ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, for (i = 0; fbd_ioat_list[i].vendor != 0; i++) { if (dmadev->vendor == fbd_ioat_list[i].vendor && dmadev->device == fbd_ioat_list[i].ioat_dev) { + if (!(fbd_ioat_list[i].enabled || enable_all)) + continue; if (fbd_dev) *fbd_dev = memdev; if (ioat_dev) -- cgit v1.2.3 From 8bea869c5e56234990e6bad92a543437115bfc18 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 27 Apr 2009 09:44:40 +0200 Subject: ALSA: PCM midlevel: improve fifo_size handling Move the fifo_size assignment to hw->ioctl callback to allow lowlevel drivers overwrite the default behaviour. fifo_size is in frames not bytes as specified in asound.h and alsa-lib's documentation, but most hardware have fixed byte based FIFOs. Introduce internal SNDRV_PCM_INFO_FIFO_IN_FRAMES. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/asound.h | 1 + include/sound/pcm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/asound.h b/include/sound/asound.h index 6add80fc2512..82aed3f47534 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -255,6 +255,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HALF_DUPLEX 0x00100000 /* only half duplex */ #define SNDRV_PCM_INFO_JOINT_DUPLEX 0x00200000 /* playback and capture stream are somewhat correlated */ #define SNDRV_PCM_INFO_SYNC_START 0x00400000 /* pcm support some kind of sync go */ +#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ typedef int __bitwise snd_pcm_state_t; #define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 267effddb070..8a69b5c1e1c4 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -98,6 +98,7 @@ struct snd_pcm_ops { #define SNDRV_PCM_IOCTL1_INFO 1 #define SNDRV_PCM_IOCTL1_CHANNEL_INFO 2 #define SNDRV_PCM_IOCTL1_GSTATE 3 +#define SNDRV_PCM_IOCTL1_FIFO_SIZE 4 #define SNDRV_PCM_TRIGGER_STOP 0 #define SNDRV_PCM_TRIGGER_START 1 -- cgit v1.2.3 From bbbee90829304d156c12b171c0ac7e6e1aba8b90 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2009 14:25:58 +0200 Subject: perf_counter: Ammend cleanup in fork() fail When fork() fails we cannot use perf_counter_exit_task() since that assumes to operate on current. Write a new helper that cleans up unused/clean contexts. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 717bf3b59ba4..519a41bba249 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -579,6 +579,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); +extern void perf_counter_free_task(struct task_struct *task); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); @@ -644,6 +645,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline int perf_counter_init_task(struct task_struct *child) { return 0; } static inline void perf_counter_exit_task(struct task_struct *child) { } +static inline void perf_counter_free_task(struct task_struct *task) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_disable(void) { } -- cgit v1.2.3 From b2e1feaf0af6b8a826b86748a19ddc2013ab7dbd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 28 May 2009 14:34:20 -0700 Subject: cred: #include init.h in cred.h linux/cred.h can't be included as first header (alphabetical order) because it uses __init which is enough to break compilation on some archs. Signed-off-by: Alexey Dobriyan Acked-by: James Morris Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 3282ee4318e7..4fa999696310 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -13,6 +13,7 @@ #define _LINUX_CRED_H #include +#include #include #include -- cgit v1.2.3 From e767e0561d7fd2333df1921f1ab4176211f9036b Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 28 May 2009 14:34:28 -0700 Subject: memcg: fix deadlock between lock_page_cgroup and mapping tree_lock mapping->tree_lock can be acquired from interrupt context. Then, following dead lock can occur. Assume "A" as a page. CPU0: lock_page_cgroup(A) interrupted -> take mapping->tree_lock. CPU1: take mapping->tree_lock -> lock_page_cgroup(A) This patch tries to fix above deadlock by moving memcg's hook to out of mapping->tree_lock. charge/uncharge of pagecache/swapcache is protected by page lock, not tree_lock. After this patch, lock_page_cgroup() is not called under mapping->tree_lock. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Daisuke Nishimura Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 62d81435347a..d476aad3ff57 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page, return 0; } +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.2.3 From 52bb25a620e1925bb53d41d0ed28571b3de98a31 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 1 Jun 2009 06:21:13 +0000 Subject: headers_check fix: linux/auto_fs.h fix the following 'make headers_check' warnings: usr/include/linux/auto_fs.h:17: include of is preferred over Signed-off-by: Jaswinder Singh Rajput --- include/linux/auto_fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index 63265852b7d1..7b09c8348fd3 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -14,13 +14,12 @@ #ifndef _LINUX_AUTO_FS_H #define _LINUX_AUTO_FS_H +#include #ifdef __KERNEL__ #include #include -#include #include #else -#include #include #endif /* __KERNEL__ */ -- cgit v1.2.3 From d280cc989ad591607e812cd5c5dfde702b5f191a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 1 Jun 2009 06:23:25 +0000 Subject: headers_check fix: linux/net_dropmon.h fix the following 'make headers_check' warnings: usr/include/linux/net_dropmon.h:7: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/net_dropmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 0217fb81a630..0e2e100c44a2 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h @@ -1,6 +1,7 @@ #ifndef __NET_DROPMON_H #define __NET_DROPMON_H +#include #include struct net_dm_drop_point { -- cgit v1.2.3 From 25346b93ca079080c9cb23331db5c4f6404e8530 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:48:12 +1000 Subject: perf_counter: Provide functions for locking and pinning the context for a task This abstracts out the code for locking the context associated with a task. Because the context might get transferred from one task to another concurrently, we have to check after locking the context that it is still the right context for the task and retry if not. This was open-coded in find_get_context() and perf_counter_init_task(). This adds a further function for pinning the context for a task, i.e. marking it so it can't be transferred to another task. This adds a 'pin_count' field to struct perf_counter_context to indicate that a context is pinned, instead of the previous method of setting the parent_gen count to all 1s. Pinning the context with a pin_count is easier to undo and doesn't require saving the parent_gen value. This also adds a perf_unpin_context() to undo the effect of perf_pin_task_context() and changes perf_counter_init_task to use it. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.34748.755674.596386@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 519a41bba249..81ec79c9f193 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -543,6 +543,7 @@ struct perf_counter_context { struct perf_counter_context *parent_ctx; u64 parent_gen; u64 generation; + int pin_count; struct rcu_head rcu_head; }; -- cgit v1.2.3 From 22a4f650d686eeaac3629dae1c4294381485efdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 1 Jun 2009 10:13:37 +0200 Subject: perf_counter: Tidy up style details - whitespace fixlets - make local variable definitions more consistent [ Impact: cleanup ] Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 81ec79c9f193..0e57d8cc5a3d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -562,7 +562,7 @@ struct perf_cpu_context { * * task, softirq, irq, nmi context */ - int recursion[4]; + int recursion[4]; }; #ifdef CONFIG_PERF_COUNTERS -- cgit v1.2.3 From fb39125fd79a25c5002f3b45cf4c80e3fa6b961b Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 17 Apr 2009 15:15:51 +0800 Subject: ftrace, workqueuetrace: make workqueue tracepoints use TRACE_EVENT macro v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format introduced by Steven Rostedt: consolidate trace and trace_event headers v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap the work addr v1: zhaolei@cn.fujitsu.com: Make workqueue tracepoints use TRACE_EVENT macro TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to the tracepoints: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Then, this patch converts DEFINE_TRACE to TRACE_EVENT in workqueue related tracepoints. [ Impact: expand workqueue tracer to events tracing ] Signed-off-by: Zhao Lei Cc: Steven Rostedt Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Andrew Morton Signed-off-by: KOSAKI Motohiro Signed-off-by: Frederic Weisbecker --- include/trace/events/workqueue.h | 100 +++++++++++++++++++++++++++++++++++++++ include/trace/workqueue.h | 25 ---------- 2 files changed, 100 insertions(+), 25 deletions(-) create mode 100644 include/trace/events/workqueue.h delete mode 100644 include/trace/workqueue.h (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 000000000000..035f1bff288e --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,100 @@ +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +TRACE_EVENT(workqueue_insertion, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +TRACE_EVENT(workqueue_execution, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +/* Trace the creation of one workqueue thread on a cpu */ +TRACE_EVENT(workqueue_creation, + + TP_PROTO(struct task_struct *wq_thread, int cpu), + + TP_ARGS(wq_thread, cpu), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(int, cpu) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->cpu = cpu; + ), + + TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm, + __entry->thread_pid, __entry->cpu) +); + +TRACE_EVENT(workqueue_destruction, + + TP_PROTO(struct task_struct *wq_thread), + + TP_ARGS(wq_thread), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + ), + + TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid) +); + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h deleted file mode 100644 index 7626523deeba..000000000000 --- a/include/trace/workqueue.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __TRACE_WORKQUEUE_H -#define __TRACE_WORKQUEUE_H - -#include -#include -#include - -DECLARE_TRACE(workqueue_insertion, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -DECLARE_TRACE(workqueue_execution, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -/* Trace the creation of one workqueue thread on a cpu */ -DECLARE_TRACE(workqueue_creation, - TP_PROTO(struct task_struct *wq_thread, int cpu), - TP_ARGS(wq_thread, cpu)); - -DECLARE_TRACE(workqueue_destruction, - TP_PROTO(struct task_struct *wq_thread), - TP_ARGS(wq_thread)); - -#endif /* __TRACE_WORKQUEUE_H */ -- cgit v1.2.3 From 6e25db44a7ad7eb380f4ec774ec00a8fcddea112 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 May 2009 11:24:59 +0800 Subject: tracing/events: fix a typo in __string() format output "tsize" should be "\tsize". Also remove the space before "__str_loc". Before: # cat tracing/events/irq/irq_handler_entry/format ... field:int irq; offset:12; size:4; field: __str_loc name; offset:16;tsize:2; ... After: # cat tracing/events/irq/irq_handler_entry/format ... field:int irq; offset:12; size:4; field:__str_loc name; offset:16; size:2; ... [ Impact: standardize __string field description in events format file ] Signed-off-by: Li Zefan Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b4ec83ae711f..9276ec4f34de 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -209,8 +209,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) \ - ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t" \ - "offset:%u;tsize:%u;\n", \ + ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __str_loc_##item), \ (unsigned int)sizeof(field.__str_loc_##item)); \ -- cgit v1.2.3 From a9c1c3abe1160a5632e48c929b02b740556bf423 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 1 Jun 2009 15:35:13 +0800 Subject: tracing/events: put TP_fast_assign into braces Currently TP_fast_assign has a limitation that we can't define local variables in it. Here's one use case when we introduce __dynamic_array(): TP_fast_assign( type *p = __get_dynamic_array(item); foo(p); bar(p); ), [ Impact: allow defining local variables in TP_fast_assign ] Signed-off-by: Li Zefan LKML-Reference: <4A2384B1.90100@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 9276ec4f34de..ee9268222448 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -471,7 +471,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - assign; \ + { assign; } \ \ if (!filter_current_check_discard(event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ -- cgit v1.2.3 From 7fcb7c472f455d1711eb5a7633204dba8800a6d6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 1 Jun 2009 15:35:46 +0800 Subject: tracing/events: introduce __dynamic_array() __string() is limited: - it's a char array, but we may want to define array with other types - a source string should be available, but we may just know the string size We introduce __dynamic_array() to break those limitations, and __string() becomes a wrapper of it. As a side effect, now __get_str() can be used in TP_fast_assign but not only TP_print. Take XFS for example, we have the string length in the dirent, but the string itself is not NULL-terminated, so __dynamic_array() can be used: TRACE_EVENT(xfs_dir2, TP_PROTO(struct xfs_da_args *args), TP_ARGS(args), TP_STRUCT__entry( __field(int, namelen) __dynamic_array(char, name, args->namelen + 1) ... ), TP_fast_assign( char *name = __get_str(name); if (args->namelen) memcpy(name, args->name, args->namelen); name[args->namelen] = '\0'; __entry->namelen = args->namelen; ), TP_printk("name %.*s namelen %d", __entry->namelen ? __get_str(name) : NULL __entry->namelen) ); [ Impact: allow defining dynamic size arrays ] Signed-off-by: Li Zefan LKML-Reference: <4A2384D2.3080403@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 122 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index ee9268222448..b5478dab579b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,14 +18,17 @@ #include +#undef __field +#define __field(type, item) type item; + #undef __array #define __array(type, item, len) type item[len]; -#undef __field -#define __field(type, item) type item; +#undef __dynamic_array +#define __dynamic_array(type, item, len) unsigned short __data_loc_##item; #undef __string -#define __string(item, src) unsigned short __str_loc_##item; +#define __string(item, src) __dynamic_array(char, item, -1) #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -35,7 +38,7 @@ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ - char __str_data[0]; \ + char __data[0]; \ }; \ static struct ftrace_event_call event_##name @@ -47,30 +50,31 @@ * * Include the following: * - * struct ftrace_str_offsets_ { - * int ; - * int ; + * struct ftrace_data_offsets_ { + * int ; + * int ; * [...] * }; * - * The __string() macro will create each int , this is to - * keep the offset of each string from the beggining of the event - * once we perform the strlen() of the src strings. - * + * The __dynamic_array() macro will create each int , this is + * to keep the offset of each array from the beginning of the event. */ +#undef __field +#define __field(type, item); + #undef __array #define __array(type, item, len) -#undef __field -#define __field(type, item); +#undef __dynamic_array +#define __dynamic_array(type, item, len) int item; #undef __string -#define __string(item, src) int item; +#define __string(item, src) __dynamic_array(char, item, -1) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - struct ftrace_str_offsets_##call { \ + struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -119,8 +123,12 @@ #undef TP_printk #define TP_printk(fmt, args...) fmt "\n", args +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + __entry->__data_loc_##field) + #undef __get_str -#define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) +#define __get_str(field) (char *)__get_dynamic_array(field) #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ @@ -207,16 +215,19 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ if (!ret) \ return 0; -#undef __string -#define __string(item, src) \ - ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t" \ +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), \ - __str_loc_##item), \ - (unsigned int)sizeof(field.__str_loc_##item)); \ + __data_loc_##item), \ + (unsigned int)sizeof(field.__data_loc_##item)); \ if (!ret) \ return 0; +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + #undef __entry #define __entry REC @@ -260,11 +271,14 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\ + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0); + #undef __string -#define __string(item, src) \ - ret = trace_define_field(event_call, "__str_loc", #item, \ - offsetof(typeof(field), __str_loc_##item), \ - sizeof(field.__str_loc_##item), 0); +#define __string(item, src) __dynamic_array(char, item, -1) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ @@ -288,6 +302,43 @@ ftrace_define_fields_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* + * remember the offset of each array from the beginning of the event. + */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data); \ + __data_size += (len) * sizeof(type); + +#undef __string +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static inline int ftrace_get_offsets_##call( \ + struct ftrace_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + struct ftrace_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 4 of the trace events. * @@ -432,15 +483,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ #undef __array #define __array(type, item, len) +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __entry->__data_loc_##item = __data_offsets.item; + #undef __string -#define __string(item, src) \ - __str_offsets.item = __str_size + \ - offsetof(typeof(*entry), __str_data); \ - __str_size += strlen(src) + 1; +#define __string(item, src) __dynamic_array(char, item, -1) \ #undef __assign_str #define __assign_str(dst, src) \ - __entry->__str_loc_##dst = __str_offsets.dst; \ strcpy(__get_str(dst), src); #undef TRACE_EVENT @@ -451,26 +502,29 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ - struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ - int __str_size = 0; \ + int __data_size; \ int pc; \ \ local_save_flags(irq_flags); \ pc = preempt_count(); \ \ - tstruct; \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call) + __str_size,\ + sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ return; \ entry = ring_buffer_event_data(event); \ \ + \ + tstruct \ + \ { assign; } \ \ if (!filter_current_check_discard(event_call, entry, event)) \ -- cgit v1.2.3 From 1d080d6c3141623c92caaebe20e847cb99ccbb60 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Jun 2009 12:20:40 -0400 Subject: tracing: remove redundant SOFTIRQ from softirq event traces After converting the softirq tracer to use te flags options, this caused a regression with the name. Since the flag was used directly it was printed out (i.e. HRTIMER_SOFTIRQ). This patch only shows the softirq name without the SOFTIRQ part. [ Impact: fix regression of output from softirq events ] Signed-off-by: Steven Rostedt --- include/trace/events/irq.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 683fb36a9943..b0c7ede55eb1 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -7,18 +7,18 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq -#define softirq_name(sirq) { sirq, #sirq } -#define show_softirq_name(val) \ - __print_symbolic(val, \ - softirq_name(HI_SOFTIRQ), \ - softirq_name(TIMER_SOFTIRQ), \ - softirq_name(NET_TX_SOFTIRQ), \ - softirq_name(NET_RX_SOFTIRQ), \ - softirq_name(BLOCK_SOFTIRQ), \ - softirq_name(TASKLET_SOFTIRQ), \ - softirq_name(SCHED_SOFTIRQ), \ - softirq_name(HRTIMER_SOFTIRQ), \ - softirq_name(RCU_SOFTIRQ)) +#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI), \ + softirq_name(TIMER), \ + softirq_name(NET_TX), \ + softirq_name(NET_RX), \ + softirq_name(BLOCK), \ + softirq_name(TASKLET), \ + softirq_name(SCHED), \ + softirq_name(HRTIMER), \ + softirq_name(RCU)) /** * irq_handler_entry - called immediately before the irq action handler -- cgit v1.2.3 From 112f38a7e36e9d688b389507136bf3af3e6d159b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Jun 2009 15:16:05 -0400 Subject: tracing: make trace pipe recognize latency format flag The trace_pipe did not recognize the latency format flag and would produce different output than the trace file. The problem was partly due that the trace flags in the iterator was not set as well as the trace_pipe zeros out part of the iterator (including the flags) to be able to use the same routines as the trace file. trace_flags of the iterator should not cause any problems when not zeroed out by for trace_pipe. Reported-by: Johannes Berg Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bbf40f624fc8..5c093ffc655b 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -51,6 +51,7 @@ struct trace_iterator { int cpu_file; struct mutex mutex; struct ring_buffer_iter *buffer_iter[NR_CPUS]; + unsigned long iter_flags; /* The below is zeroed out in pipe_read */ struct trace_seq seq; @@ -58,7 +59,6 @@ struct trace_iterator { int cpu; u64 ts; - unsigned long iter_flags; loff_t pos; long idx; -- cgit v1.2.3 From 10a8ebbb08c4b08292598947bbe534e04d6ee705 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 2 Jun 2009 12:02:38 +0200 Subject: ALSA: Core - add snd_card_set_id() function Introduce snd_card_set_id() function to allow lowlevel drivers to set default identification name for card slot. The function checks also for identification name collisions and tries to create unique name. Also, the snd_card_create() function is simplified, because this new function is used. As bonus, proper name collision checks are evaluated at the card create time. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 3dea79829acc..0e8ae10155ab 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -313,6 +313,7 @@ struct snd_card *snd_card_new(int idx, const char *id, int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); +void snd_card_set_id(struct snd_card *card, const char *id); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); int snd_card_info_done(void); -- cgit v1.2.3 From 3f731ca60afc29f5bcdb5fd2a04391466313a9ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:52:30 +1000 Subject: perf_counter: Fix cpu migration counter This fixes the cpu migration software counter to count correctly even when contexts get swapped from one task to another. Previously the cpu migration counts reported by perf stat were bogus, ranging from negative to several thousand for a single "lat_ctx 2 8 32" run. With this patch the cpu migration count reported for "lat_ctx 2 8 32" is almost always between 35 and 44. This fixes the problem by adding a call into the perf_counter code from set_task_cpu when tasks are migrated. This enables us to use the generic swcounter code (with some modifications) for the cpu migration counter. This modifies the swcounter code to allow a NULL regs pointer to be passed in to perf_swcounter_ctx_event() etc. The cpu migration counter does this because there isn't necessarily a pt_regs struct for the task available. In this case, the counter will not have interrupt capability - but the migration counter didn't have interrupt capability before, so this is no loss. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.35006.819769.416327@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0e57d8cc5a3d..deb9acf9ad2a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -615,6 +615,8 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len, extern void perf_counter_comm(struct task_struct *tsk); +extern void perf_counter_task_migration(struct task_struct *task, int cpu); + #define MAX_STACK_DEPTH 255 struct perf_callchain_entry { @@ -668,6 +670,8 @@ perf_counter_munmap(unsigned long addr, unsigned long len, static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } +static inline void perf_counter_task_migration(struct task_struct *task, + int cpu) { } #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From bf4e0ed3d027ce581be18496036862131b5f32aa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:53:16 +1000 Subject: perf_counter: Remove unused prev_state field This removes the prev_state field of struct perf_counter since it is now unused. It was only used by the cpu migration counter, which doesn't use it any more. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.35052.915728.626374@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index deb9acf9ad2a..d970fbc16afd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -427,7 +427,6 @@ struct perf_counter { const struct pmu *pmu; enum perf_counter_active_state state; - enum perf_counter_active_state prev_state; atomic64_t count; /* -- cgit v1.2.3 From 709e50cf870e61745b39552044aa6c7c38e4f9e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 14:13:15 +0200 Subject: perf_counter: Use PID namespaces properly Stop using task_struct::pid and start using PID namespaces. PIDs will be reported in the PID namespace of the monitoring task at the moment of counter creation. Signed-off-by: Peter Zijlstra Cc: Eric W. Biederman Cc: Oleg Nesterov Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d970fbc16afd..9ec20fc6bd36 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -317,6 +317,7 @@ enum perf_event_type { #include #include #include +#include #include struct task_struct; @@ -500,6 +501,8 @@ struct perf_counter { void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; + + struct pid_namespace *ns; #endif }; -- cgit v1.2.3 From 05ad709d04799125ed85dd816fdb558258102172 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 2 Jun 2009 16:58:10 +0100 Subject: parport: quickfix the proc registration bug Ideally we should have a directory of drivers and a link to the 'active' driver. For now just show the first device which is effectively the existing semantics without a warning. This is an update on the original buggy patch that I then forgot to resubmit. Confusingly it was proposed by Red Hat, written by Etched Pixels fixed and submitted by Intel ... Resolves-Bug: http://bugzilla.kernel.org/show_bug.cgi?id=9749 Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/parport.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/parport.h b/include/linux/parport.h index e1f83c5065c5..38a423ed3c01 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -324,6 +324,10 @@ struct parport { int spintime; atomic_t ref_count; + unsigned long devflags; +#define PARPORT_DEVPROC_REGISTERED 0 + struct pardevice *proc_device; /* Currently register proc device */ + struct list_head full_list; struct parport *slaves[3]; }; -- cgit v1.2.3 From 53e111a730ea8b002d57dd226098c12789993329 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 17:01:58 +0200 Subject: x86: Fix atomic_long_xchg() on 64bit Apparently I'm the first to use it :-) Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/asm-generic/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 3673a13b6703..81d3be459efb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_cmpxchg(l, old, new) \ (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(l), (new))) + (atomic64_xchg((atomic64_t *)(v), (new))) #else /* BITS_PER_LONG == 64 */ -- cgit v1.2.3 From 8e5799b1ad2a0567fdfaaf0e91b40efee010f2c1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:08:15 +0200 Subject: perf_counter: Add unique counter id Stephan raised the issue that we currently cannot distinguish between similar counters within a group (PERF_RECORD_GROUP uses the config value as identifier). Therefore, generate a new ID for each counter using a global u64 sequence counter. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9ec20fc6bd36..4845a214b9e7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -114,8 +114,9 @@ enum perf_counter_record_format { * in increasing order of bit value, after the counter value. */ enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1, - PERF_FORMAT_TOTAL_TIME_RUNNING = 2, + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, }; /* @@ -290,7 +291,7 @@ enum perf_event_type { * { u32 cpu, res; } && PERF_RECORD_CPU * * { u64 nr; - * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP + * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP * * { u16 nr, * hv, @@ -503,6 +504,7 @@ struct perf_counter { struct rcu_head rcu_head; struct pid_namespace *ns; + u64 id; #endif }; -- cgit v1.2.3 From b23f3325ed465f1bd914384884269af0d106778c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:13:03 +0200 Subject: perf_counter: Rename various fields A few renames: s/irq_period/sample_period/ s/irq_freq/sample_freq/ s/PERF_RECORD_/PERF_SAMPLE_/ s/record_type/sample_type/ And change both the new sample_type and read_format to u64. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4845a214b9e7..1fcd3cc93855 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -94,18 +94,18 @@ enum sw_event_ids { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* - * Bits that can be set in hw_event.record_type to request information + * Bits that can be set in hw_event.sample_type to request information * in the overflow packets. */ -enum perf_counter_record_format { - PERF_RECORD_IP = 1U << 0, - PERF_RECORD_TID = 1U << 1, - PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_ADDR = 1U << 3, - PERF_RECORD_GROUP = 1U << 4, - PERF_RECORD_CALLCHAIN = 1U << 5, - PERF_RECORD_CONFIG = 1U << 6, - PERF_RECORD_CPU = 1U << 7, +enum perf_counter_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_CONFIG = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, }; /* @@ -132,12 +132,12 @@ struct perf_counter_hw_event { __u64 config; union { - __u64 irq_period; - __u64 irq_freq; + __u64 sample_period; + __u64 sample_freq; }; - __u32 record_type; - __u32 read_format; + __u64 sample_type; + __u64 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -262,7 +262,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u64 time; - * u64 irq_period; + * u64 sample_period; * }; */ PERF_EVENT_PERIOD = 4, @@ -363,7 +363,7 @@ struct hw_perf_counter { }; }; atomic64_t prev_count; - u64 irq_period; + u64 sample_period; atomic64_t period_left; u64 interrupts; #endif -- cgit v1.2.3 From 8a016db386195b193e2a8aeddff9fe937dcb7a40 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:27:45 +0200 Subject: perf_counter: Remove the last nmi/irq bits IRQ (non-NMI) sampling is not used anymore - remove the last few bits. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1fcd3cc93855..cef9931793fd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -140,7 +140,6 @@ struct perf_counter_hw_event { __u64 read_format; __u64 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ inherit : 1, /* children inherit it */ pinned : 1, /* must always be on PMU */ exclusive : 1, /* only group on PMU */ @@ -153,7 +152,7 @@ struct perf_counter_hw_event { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_1 : 51; + __reserved_1 : 52; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -354,7 +353,6 @@ struct hw_perf_counter { u64 config; unsigned long config_base; unsigned long counter_base; - int nmi; int idx; }; union { /* software */ -- cgit v1.2.3 From 8e3747c13c39246c7e46def7cf495d9d21d4c5f9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 16:16:02 +0200 Subject: perf_counter: Change data head from u32 to u64 Since some people worried that 4G might not be a large enough as an mmap data window, extend it to 64 bit for capable platforms. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index cef9931793fd..c046f7d97cfa 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -212,7 +212,7 @@ struct perf_counter_mmap_page { * User-space reading this value should issue an rmb(), on SMP capable * platforms, after reading this value -- see perf_counter_wakeup(). */ - __u32 data_head; /* head in the data section */ + __u64 data_head; /* head in the data section */ }; #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) @@ -397,10 +397,11 @@ struct perf_mmap_data { int nr_locked; /* nr pages mlocked */ atomic_t poll; /* POLL_ for wakeups */ - atomic_t head; /* write position */ atomic_t events; /* event limit */ - atomic_t done_head; /* completed head */ + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + atomic_t lock; /* concurrent writes */ atomic_t wakeup; /* needs a wakeup */ -- cgit v1.2.3 From 08247e31ca79b8f02cce47b7e8120797a8726606 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 16:46:57 +0200 Subject: perf_counter: Add ioctl for changing the sample period/frequency Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c046f7d97cfa..45bdd3b95d3e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -164,10 +164,11 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IOW('$', 0, u32) -#define PERF_COUNTER_IOC_DISABLE _IOW('$', 1, u32) -#define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) -#define PERF_COUNTER_IOC_RESET _IOW('$', 3, u32) +#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) +#define PERF_COUNTER_IOC_RESET _IO ('$', 3) +#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, -- cgit v1.2.3 From 0d48696f87e3618b0d35bd3e4e9d7c188d51e7de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 19:22:16 +0200 Subject: perf_counter: Rename perf_counter_hw_event => perf_counter_attr The structure isn't hw only and when I read event, I think about those things that fall out the other end. Rename the thing. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 34 +++++++++++++++++----------------- include/linux/syscalls.h | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 45bdd3b95d3e..37d5541d74cb 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -22,7 +22,7 @@ */ /* - * hw_event.type + * attr.type */ enum perf_event_types { PERF_TYPE_HARDWARE = 0, @@ -37,10 +37,10 @@ enum perf_event_types { }; /* - * Generalized performance counter event types, used by the hw_event.event_id + * Generalized performance counter event types, used by the attr.event_id * parameter of the sys_perf_counter_open() syscall: */ -enum hw_event_ids { +enum attr_ids { /* * Common hardware events, generalized by the kernel: */ @@ -94,7 +94,7 @@ enum sw_event_ids { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* - * Bits that can be set in hw_event.sample_type to request information + * Bits that can be set in attr.sample_type to request information * in the overflow packets. */ enum perf_counter_sample_format { @@ -109,7 +109,7 @@ enum perf_counter_sample_format { }; /* - * Bits that can be set in hw_event.read_format to request that + * Bits that can be set in attr.read_format to request that * reads on the counter should return the indicated quantities, * in increasing order of bit value, after the counter value. */ @@ -122,7 +122,7 @@ enum perf_counter_read_format { /* * Hardware event to monitor via a performance monitoring counter: */ -struct perf_counter_hw_event { +struct perf_counter_attr { /* * The MSB of the config word signifies if the rest contains cpu * specific (raw) counter configuration data, if unset, the next @@ -323,25 +323,25 @@ enum perf_event_type { struct task_struct; -static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_raw(struct perf_counter_attr *attr) { - return hw_event->config & PERF_COUNTER_RAW_MASK; + return attr->config & PERF_COUNTER_RAW_MASK; } -static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_config(struct perf_counter_attr *attr) { - return hw_event->config & PERF_COUNTER_CONFIG_MASK; + return attr->config & PERF_COUNTER_CONFIG_MASK; } -static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_type(struct perf_counter_attr *attr) { - return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> + return (attr->config & PERF_COUNTER_TYPE_MASK) >> PERF_COUNTER_TYPE_SHIFT; } -static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_id(struct perf_counter_attr *attr) { - return hw_event->config & PERF_COUNTER_EVENT_MASK; + return attr->config & PERF_COUNTER_EVENT_MASK; } /** @@ -457,7 +457,7 @@ struct perf_counter { u64 tstamp_running; u64 tstamp_stopped; - struct perf_counter_hw_event hw_event; + struct perf_counter_attr attr; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -605,8 +605,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !perf_event_raw(&counter->hw_event) && - perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; + return !perf_event_raw(&counter->attr) && + perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE; } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 79faae950e2e..c6c84ad8bd71 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,7 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; -struct perf_counter_hw_event; +struct perf_counter_attr; #include #include @@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( - const struct perf_counter_hw_event __user *hw_event_uptr, + const struct perf_counter_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif -- cgit v1.2.3 From a05c0205ba031c01bba33a21bf0a35920eb64833 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 3 Jun 2009 09:33:18 +0200 Subject: block: Fix bounce limit setting in DM blk_queue_bounce_limit() is more than a wrapper about the request queue limits.bounce_pfn variable. Introduce blk_queue_bounce_pfn() which can be called by stacking drivers that wish to set the bounce limit explicitly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5e740a135e73..989aa1790f48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,6 +910,7 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); +extern void blk_queue_bounce_pfn(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); -- cgit v1.2.3 From 56d8bd3f0b98972312cad683947ec90b21011199 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 3 Jun 2009 14:52:03 +0100 Subject: tracing: fix multiple use of __print_flags and __print_symbolic Here is an updated patch to include the extra call to trace_seq_init() as requested. This is vs. the latest -tip tree and fixes the use of multiple __print_flags and __print_symbolic in a single tracer. Also tested to ensure its working now: mount.gfs2-2534 [000] 235.850587: gfs2_glock_queue: 8.7 glock 1:2 dequeue PR mount.gfs2-2534 [000] 235.850591: gfs2_demote_rq: 8.7 glock 1:0 demote EX to NL flags:DI mount.gfs2-2534 [000] 235.850591: gfs2_glock_queue: 8.7 glock 1:0 dequeue EX glock_workqueue-2529 [000] 235.850666: gfs2_glock_state_change: 8.7 glock 1:0 state EX => NL tgt:NL dmt:NL flags:lDpI glock_workqueue-2529 [000] 235.850672: gfs2_glock_put: 8.7 glock 1:0 state NL => IV flags:I Signed-off-by: Steven Whitehouse LKML-Reference: <1244037123.29604.603.camel@localhost.localdomain> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b5478dab579b..40ede4db4d88 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -104,6 +104,7 @@ * field = (typeof(field))entry; * * p = get_cpu_var(ftrace_event_seq); + * trace_seq_init(p); * ret = trace_seq_printf(s, "\n"); * put_cpu(); * if (!ret) @@ -167,6 +168,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ field = (typeof(field))entry; \ \ p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ ret = trace_seq_printf(s, #call ": " print); \ put_cpu(); \ if (!ret) \ -- cgit v1.2.3 From c9fb15f60eb517c958dec64dca9357bf62bf2201 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 30 May 2009 20:42:28 -0700 Subject: drm: Hook up DPMS property handling in drm_crtc.c. Add drm_helper_connector_dpms. Making the drm_crtc.c code recognize the DPMS property and invoke the connector->dpms function doesn't remove any capability from the driver while reducing code duplication. That just highlighted the problem with the existing DPMS functions which could turn off the connector, but failed to turn off any relevant crtcs. The new drm_helper_connector_dpms function manages all of that, using the drm_helper-specific crtc and encoder dpms functions, automatically computing the appropriate DPMS level for each object in the system. This fixes the current troubles in the i915 driver which left PLLs, pipes and planes running while in DPMS_OFF mode or even while they were unused. Signed-off-by: Keith Packard Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 3 +++ include/drm/drm_crtc_helper.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 3c1924c010e8..7300fb866767 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -471,6 +471,9 @@ struct drm_connector { u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY]; uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY]; + /* requested DPMS state */ + int dpms; + void *helper_private; uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index ec073d8288d9..6769ff6c1bc0 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -99,6 +99,8 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, struct drm_framebuffer *old_fb); extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc); +extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode); + extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb, struct drm_mode_fb_cmd *mode_cmd); -- cgit v1.2.3 From e0a94c2a63f2644826069044649669b5e7ca75d3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 3 Jun 2009 16:04:31 -0400 Subject: security: use mmap_min_addr indepedently of security models This patch removes the dependency of mmap_min_addr on CONFIG_SECURITY. It also sets a default mmap_min_addr of 4096. mmapping of addresses below 4096 will only be possible for processes with CAP_SYS_RAWIO. Signed-off-by: Christoph Lameter Acked-by: Eric Paris Looks-ok-by: Linus Torvalds Signed-off-by: James Morris --- include/linux/mm.h | 2 -- include/linux/security.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c7..0c21af6abffb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -580,12 +580,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ static inline unsigned long round_hint_to_min(unsigned long hint) { -#ifdef CONFIG_SECURITY hint &= PAGE_MASK; if (((void *)hint != NULL) && (hint < mmap_min_addr)) return PAGE_ALIGN(mmap_min_addr); -#endif return hint; } diff --git a/include/linux/security.h b/include/linux/security.h index d5fd6163606f..5eff459b3833 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { + if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) + return -EACCES; return 0; } -- cgit v1.2.3 From 60313ebed739b331e8e61079da27a11ee3b73a30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2009 16:53:44 +0200 Subject: perf_counter: Add fork event Create a fork event so that we can easily clone the comm and dso maps without having to generate all those events. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 37d5541d74cb..380247bdb918 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -276,6 +276,14 @@ enum perf_event_type { PERF_EVENT_THROTTLE = 5, PERF_EVENT_UNTHROTTLE = 6, + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * }; + */ + PERF_EVENT_FORK = 7, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -618,6 +626,7 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); extern void perf_counter_comm(struct task_struct *tsk); +extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); @@ -673,6 +682,7 @@ perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } static inline void perf_counter_comm(struct task_struct *tsk) { } +static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } static inline void perf_counter_task_migration(struct task_struct *task, int cpu) { } -- cgit v1.2.3 From d99e9446200c1ffab28cb0e39b76c34a2bfafd06 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2009 17:08:58 +0200 Subject: perf_counter: Remove munmap stuff In name of keeping it simple, only track mmap events. Userspace will have to remove old overlapping maps when it encounters them. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 380247bdb918..6ca403acd419 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -148,11 +148,10 @@ struct perf_counter_attr { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ mmap : 1, /* include mmap data */ - munmap : 1, /* include munmap data */ comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_1 : 52; + __reserved_1 : 53; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -246,7 +245,6 @@ enum perf_event_type { * }; */ PERF_EVENT_MMAP = 1, - PERF_EVENT_MUNMAP = 2, /* * struct { @@ -622,9 +620,6 @@ extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); extern void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -extern void perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file); - extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk); @@ -677,10 +672,6 @@ static inline void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } -static inline void -perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) { } - static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } -- cgit v1.2.3 From 5926a295bb78272b3f648f62febecd19a1b6a6ca Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Thu, 4 Jun 2009 17:43:04 +0100 Subject: [ARM] 5541/1: serial/amba-pl011.c: add support for the modified port found in Nomadik The Nomadik 8815 SoC has a slightly modified version of the PL011 block. The patch uses the different ID value as a key to select a vendor structure that is used to keep track of the differences, as suggested by Russell King. Signed-off-by: Alessandro Rubini Acked-by: Andrea Gallo Acked-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/serial.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 48ee32a18ac5..42949e210cb8 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -114,6 +114,9 @@ #define UART011_IFLS_TX4_8 (2 << 0) #define UART011_IFLS_TX6_8 (3 << 0) #define UART011_IFLS_TX7_8 (4 << 0) +/* special values for ST vendor with deeper fifo */ +#define UART011_IFLS_RX_HALF (5 << 3) +#define UART011_IFLS_TX_HALF (5 << 0) #define UART011_OEIM (1 << 10) /* overrun error interrupt mask */ #define UART011_BEIM (1 << 9) /* break error interrupt mask */ -- cgit v1.2.3 From c0683039207226afcffbe0fbf6a1caaee77a37b0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 3 Jun 2009 17:43:14 +0100 Subject: [ARM] 5536/1: Move clk_add_alias() to arch/arm/common/clkdev.c This can be used for other arm platforms too as discussed on the linux-arm-kernel list. Also check the return value with IS_ERR and return PTR_ERR as suggested by Russell King. Signed-off-by: Tony Lindgren Signed-off-by: Russell King --- include/linux/clk.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 1db9bbf444a3..1d37f42ac294 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -142,4 +142,17 @@ struct clk *clk_get_parent(struct clk *clk); */ struct clk *clk_get_sys(const char *dev_id, const char *con_id); +/** + * clk_add_alias - add a new clock alias + * @alias: name for clock alias + * @alias_dev_name: device name + * @id: platform specific clock name + * @dev: device + * + * Allows using generic clock names for drivers by adding a new alias. + * Assumes clkdev, see clkdev.h for more info. + */ +int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, + struct device *dev); + #endif -- cgit v1.2.3 From 087eb437051b3de817720f9c80c440fc9e7dcce8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jun 2009 16:29:07 -0700 Subject: ptrace: tracehook_report_clone: fix false positives The "trace || CLONE_PTRACE" check in tracehook_report_clone() is not right, - If the untraced task does clone(CLONE_PTRACE) the new child is not traced, we must not queue SIGSTOP. - If we forked the traced task, but the tracer exits and untraces both the forking task and the new child (after copy_process() drops tasklist_lock), we should not queue SIGSTOP too. Change the code to check task_ptrace() != 0 instead. This is still racy, but the race is harmless. We can race with another tracer attaching to this child, or the tracer can exit and detach in parallel. But giwen that we didn't do wake_up_new_task() yet, the child must have the pending SIGSTOP anyway. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Christoph Hellwig Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index c7aa154f4bfc..eb96603d92db 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child, /** * tracehook_report_clone - in parent, new child is about to start running - * @trace: return value from tracehook_prepare_clone() * @regs: parent's user register state * @clone_flags: flags from parent's system call * @pid: new child's PID in the parent's namespace * @child: new child task * - * Called after a child is set up, but before it has been started - * running. @trace is the value returned by tracehook_prepare_clone(). + * Called after a child is set up, but before it has been started running. * This is not a good place to block, because the child has not started * yet. Suspend the child here if desired, and then block in * tracehook_report_clone_complete(). This must prevent the child from @@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child, * * Called with no locks held, but the child cannot run until this returns. */ -static inline void tracehook_report_clone(int trace, struct pt_regs *regs, +static inline void tracehook_report_clone(struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { + if (unlikely(task_ptrace(child))) { /* - * The child starts up with an immediate SIGSTOP. + * It doesn't matter who attached/attaching to this + * task, the pending SIGSTOP is right in any case. */ sigaddset(&child->pending.signal, SIGSTOP); set_tsk_thread_flag(child, TIF_SIGPENDING); -- cgit v1.2.3 From 089dd79db9264dc0da602bad45d42f1b3e7d1e07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 14:04:55 +0200 Subject: perf_counter: Generate mmap events for install_special_mapping() In order to track the vdso also generate mmap events for install_special_mapping(). Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6ca403acd419..40dc0e273d9c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -617,8 +617,13 @@ static inline int is_software_counter(struct perf_counter *counter) extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); -extern void perf_counter_mmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file); +extern void __perf_counter_mmap(struct vm_area_struct *vma); + +static inline void perf_counter_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_counter_mmap(vma); +} extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk); @@ -668,10 +673,7 @@ static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } -static inline void -perf_counter_mmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) { } - +static inline void perf_counter_mmap(struct vm_area_struct *vma) { } static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } -- cgit v1.2.3 From 1b8e69662e1a086878bf930a6042daf7f8a076cc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 5 Jun 2009 14:37:23 +0000 Subject: pnp: add PNP resource range checking function Add a PNP resource range check function, indicating whether a resource has been assigned to any device. Signed-off-by: Bjorn Helgaas [apw@canonical.com: fixed up exports et al] Signed-off-by: Andy Whitcroft Signed-off-by: Eric Anholt --- include/linux/pnp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index ca3c88773028..b063c7328ba5 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -446,6 +446,7 @@ int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); int pnp_activate_dev(struct pnp_dev *dev); int pnp_disable_dev(struct pnp_dev *dev); +int pnp_range_reserved(resource_size_t start, resource_size_t end); /* protocol helpers */ int pnp_is_active(struct pnp_dev *dev); @@ -476,6 +477,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } -- cgit v1.2.3 From 3f7440a6b771169e1f11fa582e53a4259b682809 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jun 2009 17:40:04 +0200 Subject: ALSA: Clean up 64bit division functions Replace the house-made div64_32() with the standard div_u64*() functions. Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 74 ----------------------------------------------------- 1 file changed, 74 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index c17296891617..0caf71e16944 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -486,80 +486,6 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream); void snd_pcm_vma_notify_data(void *client, void *data); int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area); -#if BITS_PER_LONG >= 64 - -static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem) -{ - *rem = *n % div; - *n /= div; -} - -#elif defined(i386) - -static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem) -{ - u_int32_t low, high; - low = *n & 0xffffffff; - high = *n >> 32; - if (high) { - u_int32_t high1 = high % div; - high /= div; - asm("divl %2":"=a" (low), "=d" (*rem):"rm" (div), "a" (low), "d" (high1)); - *n = (u_int64_t)high << 32 | low; - } else { - *n = low / div; - *rem = low % div; - } -} -#else - -static inline void divl(u_int32_t high, u_int32_t low, - u_int32_t div, - u_int32_t *q, u_int32_t *r) -{ - u_int64_t n = (u_int64_t)high << 32 | low; - u_int64_t d = (u_int64_t)div << 31; - u_int32_t q1 = 0; - int c = 32; - while (n > 0xffffffffU) { - q1 <<= 1; - if (n >= d) { - n -= d; - q1 |= 1; - } - d >>= 1; - c--; - } - q1 <<= c; - if (n) { - low = n; - *q = q1 | (low / div); - *r = low % div; - } else { - *r = 0; - *q = q1; - } - return; -} - -static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem) -{ - u_int32_t low, high; - low = *n & 0xffffffff; - high = *n >> 32; - if (high) { - u_int32_t high1 = high % div; - u_int32_t low1 = low; - high /= div; - divl(high1, low1, div, &low, rem); - *n = (u_int64_t)high << 32 | low; - } else { - *n = low / div; - *rem = low % div; - } -} -#endif - /* * PCM library */ -- cgit v1.2.3 From ac4bcf889469ffbca88f234d3184452886a47905 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 14:44:52 +0200 Subject: perf_counter: Change PERF_SAMPLE_CONFIG into PERF_SAMPLE_ID The purpose of PERF_SAMPLE_CONFIG was to identify the counters, since then we've added counter ids, use those instead. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 40dc0e273d9c..9cea32a0655d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -104,7 +104,7 @@ enum perf_counter_sample_format { PERF_SAMPLE_ADDR = 1U << 3, PERF_SAMPLE_GROUP = 1U << 4, PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_CONFIG = 1U << 6, + PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, }; -- cgit v1.2.3 From 689802b2d0536e72281dc959ab9cb34fb3c304cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 15:05:43 +0200 Subject: perf_counter: Add PERF_SAMPLE_PERIOD In order to allow easy tracking of the period, also provide means of adding it to the sample data. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9cea32a0655d..6bc250039738 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -106,6 +106,7 @@ enum perf_counter_sample_format { PERF_SAMPLE_CALLCHAIN = 1U << 5, PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, }; /* @@ -260,6 +261,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u64 time; + * u64 id; * u64 sample_period; * }; */ -- cgit v1.2.3 From 6a24ed6c6082ec65d19331a4bfa30c0512a1a822 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 18:01:29 +0200 Subject: perf_counter: Fix frequency adjustment for < HZ Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bc250039738..4f9d39ecdc05 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -373,6 +373,9 @@ struct hw_perf_counter { u64 sample_period; atomic64_t period_left; u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; #endif }; -- cgit v1.2.3 From 2ac6bf4ddc87c3b6b609f8fa82f6ebbffeac12f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 5 Jun 2009 10:36:24 -0700 Subject: IB/mlx4: Add strong ordering to local inval and fast reg work requests The ConnectX Programmer's Reference Manual states that the "SO" bit must be set when posting Fast Register and Local Invalidate send work requests. When this bit is set, the work request will be executed only after all previous work requests on the send queue have been executed. (If the bit is not set, Fast Register and Local Invalidate WQEs may begin execution too early, which violates the defined semantics for these operations) This fixes the issue with NFS/RDMA reported in Signed-off-by: Jack Morgenstein Cc: Signed-off-by: Roland Dreier --- include/linux/mlx4/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index bf8f11982dae..9f29d86e5dc9 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -165,6 +165,7 @@ enum { MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_VLAN = 1 << 6, + MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, }; struct mlx4_wqe_ctrl_seg { -- cgit v1.2.3 From a21ca2cac582886a3e95c8bb84ff7c52d4d15e54 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 09:58:57 +0200 Subject: perf_counter: Separate out attr->type from attr->config Counter type is a frequently used value and we do a lot of bit juggling by encoding and decoding it from attr->config. Clean this up by creating a separate attr->type field. Also clean up the various similarly complex user-space bits all around counter attribute management. The net improvement is significant, and it will be easier to add a new major type (which is what triggered this cleanup). (This changes the ABI, all tools are adapted.) (PowerPC build-tested.) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 65 +++++++++----------------------------------- 1 file changed, 13 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4f9d39ecdc05..f794c69b34c9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,26 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -#define __PERF_COUNTER_MASK(name) \ - (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ - PERF_COUNTER_##name##_SHIFT) - -#define PERF_COUNTER_RAW_BITS 1 -#define PERF_COUNTER_RAW_SHIFT 63 -#define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) - -#define PERF_COUNTER_CONFIG_BITS 63 -#define PERF_COUNTER_CONFIG_SHIFT 0 -#define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) - -#define PERF_COUNTER_TYPE_BITS 7 -#define PERF_COUNTER_TYPE_SHIFT 56 -#define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) - -#define PERF_COUNTER_EVENT_BITS 56 -#define PERF_COUNTER_EVENT_SHIFT 0 -#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) - /* * Bits that can be set in attr.sample_type to request information * in the overflow packets. @@ -125,10 +105,13 @@ enum perf_counter_read_format { */ struct perf_counter_attr { /* - * The MSB of the config word signifies if the rest contains cpu - * specific (raw) counter configuration data, if unset, the next - * 7 bits are an event type and the rest of the bits are the event - * identifier. + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + __u32 __reserved_1; + + /* + * Type specific configuration information. */ __u64 config; @@ -152,12 +135,11 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_1 : 53; + __reserved_2 : 53; __u32 wakeup_events; /* wakeup every n events */ - __u32 __reserved_2; + __u32 __reserved_3; - __u64 __reserved_3; __u64 __reserved_4; }; @@ -278,8 +260,8 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u32 pid, ppid; + * struct perf_event_header header; + * u32 pid, ppid; * }; */ PERF_EVENT_FORK = 7, @@ -331,27 +313,6 @@ enum perf_event_type { struct task_struct; -static inline u64 perf_event_raw(struct perf_counter_attr *attr) -{ - return attr->config & PERF_COUNTER_RAW_MASK; -} - -static inline u64 perf_event_config(struct perf_counter_attr *attr) -{ - return attr->config & PERF_COUNTER_CONFIG_MASK; -} - -static inline u64 perf_event_type(struct perf_counter_attr *attr) -{ - return (attr->config & PERF_COUNTER_TYPE_MASK) >> - PERF_COUNTER_TYPE_SHIFT; -} - -static inline u64 perf_event_id(struct perf_counter_attr *attr) -{ - return attr->config & PERF_COUNTER_EVENT_MASK; -} - /** * struct hw_perf_counter - performance counter hardware details: */ @@ -616,8 +577,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !perf_event_raw(&counter->attr) && - perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE; + return (counter->attr.type != PERF_TYPE_RAW) && + (counter->attr.type != PERF_TYPE_HARDWARE); } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); -- cgit v1.2.3 From 8326f44da090d6d304d29b9fdc7fb3e20889e329 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Jun 2009 20:22:46 +0200 Subject: perf_counter: Implement generalized cache event types Extend generic event enumeration with the PERF_TYPE_HW_CACHE method. This is a 3-dimensional space: { L1-D, L1-I, L2, ITLB, DTLB, BPU } x { load, store, prefetch } x { accesses, misses } User-space passes in the 3 coordinates and the kernel provides a counter. (if the hardware supports that type and if the combination makes sense.) Combinations that make no sense produce a -EINVAL. Combinations that are not supported by the hardware produce -ENOTSUP. Extend the tools to deal with this, and rewrite the event symbol parsing code with various popular aliases for the units and access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are both valid aliases. ( x86 is supported for now, with the Nehalem event table filled in, and with Core2 and Atom having placeholder tables. ) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f794c69b34c9..3586df840f69 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -28,6 +28,7 @@ enum perf_event_types { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, /* * available TYPE space, raw is the max value. @@ -55,6 +56,39 @@ enum attr_ids { PERF_HW_EVENTS_MAX = 7, }; +/* + * Generalized hardware cache counters: + * + * { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum hw_cache_id { + PERF_COUNT_HW_CACHE_L1D, + PERF_COUNT_HW_CACHE_L1I, + PERF_COUNT_HW_CACHE_L2, + PERF_COUNT_HW_CACHE_DTLB, + PERF_COUNT_HW_CACHE_ITLB, + PERF_COUNT_HW_CACHE_BPU, + + PERF_COUNT_HW_CACHE_MAX, +}; + +enum hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ, + PERF_COUNT_HW_CACHE_OP_WRITE, + PERF_COUNT_HW_CACHE_OP_PREFETCH, + + PERF_COUNT_HW_CACHE_OP_MAX, +}; + +enum hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS, + PERF_COUNT_HW_CACHE_RESULT_MISS, + + PERF_COUNT_HW_CACHE_RESULT_MAX, +}; + /* * Special "software" counters provided by the kernel, even if the hardware * does not support performance counters. These counters measure various -- cgit v1.2.3 From db429e9ec0f9dee2d8e50c154f04f29f880fc9d6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 13:52:52 +0200 Subject: partitions: add ->set_capacity block device method * Add ->set_capacity block device method and use it in rescan_partitions() to attempt enabling native capacity of the device upon detecting the partition which exceeds device capacity. * Add GENHD_FL_NATIVE_CAPACITY flag to try limit attempts of enabling native capacity during partition scan. Together with the consecutive patch implementing ->set_capacity method in ide-gd device driver this allows automatic disabling of Host Protected Area (HPA) if any partitions overlapping HPA are detected. Cc: Robert Hancock Cc: Frans Pop Cc: "Andries E. Brouwer" Acked-by: Al Viro Emphatically-Acked-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/blkdev.h | 2 ++ include/linux/genhd.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6f841fb1be30..a2d7298be351 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1106,6 +1106,8 @@ struct block_device_operations { int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); int (*media_changed) (struct gendisk *); + unsigned long long (*set_capacity) (struct gendisk *, + unsigned long long); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); struct module *owner; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 634c53028fb8..239e24b081a9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -113,6 +113,7 @@ struct hd_struct { #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ +#define GENHD_FL_NATIVE_CAPACITY 128 #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) -- cgit v1.2.3 From e957b60d1583022a0f7c03267d37fcae2ddb78b1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 13:52:52 +0200 Subject: ide-gd: implement block device ->set_capacity method (v2) * Use ->probed_capacity to store native device capacity for ATA disks. * Add ->set_capacity method to struct ide_disk_ops. * Implement disk device ->set_capacity method for ATA disks. * Implement block device ->set_capacity method. v2: * Check if LBA and HPA are supported in ide_disk_set_capacity(). * According to the spec the SET MAX ADDRESS command shall be immediately preceded by a READ NATIVE MAX ADDRESS command. * Add ide_disk_hpa_{get_native,set}_capacity() helpers. Together with the previous patch adding ->set_capacity block device method this allows automatic disabling of Host Protected Area (HPA) if any partitions overlapping HPA are detected. Cc: Robert Hancock Cc: Frans Pop Cc: "Andries E. Brouwer" Cc: Al Viro Emphatically-Acked-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9fed365a598b..e96ace12872a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -397,6 +397,7 @@ struct ide_drive_s; struct ide_disk_ops { int (*check)(struct ide_drive_s *, const char *); int (*get_capacity)(struct ide_drive_s *); + u64 (*set_capacity)(struct ide_drive_s *, u64); void (*setup)(struct ide_drive_s *); void (*flush)(struct ide_drive_s *); int (*init_media)(struct ide_drive_s *, struct gendisk *); @@ -568,8 +569,7 @@ struct ide_drive_s { unsigned int drive_data; /* used by set_pio_mode/dev_select() */ unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ - u64 probed_capacity;/* initial reported media capacity (ide-cd only currently) */ - + u64 probed_capacity;/* initial/native media capacity */ u64 capacity64; /* total number of sectors */ int lun; /* logical unit */ -- cgit v1.2.3 From 075affcbe01d4d7cefcd0e30a98df1253bcf8d92 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 13:52:52 +0200 Subject: ide: preserve Host Protected Area by default (v2) From the perspective of most users of recent systems, disabling Host Protected Area (HPA) can break vendor RAID formats, GPT partitions and risks corrupting firmware or overwriting vendor system recovery tools. Unfortunately the original (kernels < 2.6.30) behavior (unconditionally disabling HPA and using full disk capacity) was introduced at the time when the main use of HPA was to make the drive look small enough for the BIOS to allow the system to boot with large capacity drives. Thus to allow the maximum compatibility with the existing setups (using HPA and partitioned with HPA disabled) we automically disable HPA if any partitions overlapping HPA are detected. Additionally HPA can also be disabled using the "nohpa" module parameter (i.e. "ide_core.nohpa=0.0" to disable HPA on /dev/hda). v2: Fix ->resume HPA support. While at it: - remove stale "idebus=" entry from Documentation/kernel-parameters.txt Cc: Robert Hancock Cc: Frans Pop Cc: "Andries E. Brouwer" Cc: Al Viro Acked-by: Sergei Shtylyov [patch description was based on input from Alan Cox and Frans Pop] Emphatically-Acked-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index e96ace12872a..45dce3b4c88c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -475,6 +475,8 @@ enum { IDE_DFLAG_NICE1 = (1 << 5), /* device is physically present */ IDE_DFLAG_PRESENT = (1 << 6), + /* disable Host Protected Area */ + IDE_DFLAG_NOHPA = (1 << 7), /* id read from device (synthetic if not set) */ IDE_DFLAG_ID_READ = (1 << 8), IDE_DFLAG_NOPROBE = (1 << 9), -- cgit v1.2.3 From 8bc1e5aa06a2a9a425c4a6795fc564cba1521487 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 15:37:09 +0200 Subject: ide: respect quirk_drives[] list on all controllers * Add ide_check_nien_quirk_list() helper to the core code and then use it in ide_port_tune_devices(). * Remove no longer needed ->quirkproc methods from hpt366.c and pdc202xx_{new,old}.c. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index c8f7b9673710..6caaae0c7743 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1453,6 +1453,7 @@ static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} void ide_register_region(struct gendisk *); void ide_unregister_region(struct gendisk *); +void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); -- cgit v1.2.3 From 734affdcae20af4fec95e46a64fb29f063a15c19 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 15:37:10 +0200 Subject: ide: add IDE_DFLAG_NIEN_QUIRK device flag Add IDE_DFLAG_NIEN_QUIRK device flag and use it instead of drive->quirk_list. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 6caaae0c7743..a6c6a2fad7c8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -499,6 +499,7 @@ enum { /* write protect */ IDE_DFLAG_WP = (1 << 29), IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 30), + IDE_DFLAG_NIEN_QUIRK = (1 << 31), }; struct ide_drive_s { @@ -530,7 +531,6 @@ struct ide_drive_s { u8 waiting_for_dma; /* dma currently in progress */ u8 dma; /* atapi dma flag */ - u8 quirk_list; /* considered quirky, set for a specific host */ u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ -- cgit v1.2.3 From aa853f85d9ed593672d0f24a98c72a2518cb63e6 Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Sat, 6 Jun 2009 10:17:57 +0100 Subject: [ARM] 5543/1: arm: serial amba: add missing declaration in serial.h This header is sometimes included in the uncompress stage to get register values, but no can be included there. So declare "struct amba_device" here before using it in a prototype. Signed-off-by: Alessandro Rubini Acked-by: Andrea Gallo Signed-off-by: Russell King --- include/linux/amba/serial.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 48ee32a18ac5..64a982ea5d5f 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -159,6 +159,7 @@ #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) #ifndef __ASSEMBLY__ +struct amba_device; /* in uncompress this is included but amba/bus.h is not */ struct amba_pl010_data { void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); }; -- cgit v1.2.3 From 226c7ffe74474257b4b87bd38ae8ba0030cf65e2 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Wed, 6 May 2009 10:52:51 -0700 Subject: [SCSI] net, libfcoe: Add the FCoE Initialization Protocol ethertype FIP is the FCoE Initialization Protocol and this patch adds the protocol ethertype to the kernel's list of ethertypes. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Acked-by: David S. Miller Signed-off-by: James Bottomley --- include/linux/if_ether.h | 1 + include/scsi/fc/fc_fip.h | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index cfe4fe1b7132..60e8934d10b5 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -79,6 +79,7 @@ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ +#define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h index 0627a9ae6347..3d138c1fcf8a 100644 --- a/include/scsi/fc/fc_fip.h +++ b/include/scsi/fc/fc_fip.h @@ -22,13 +22,6 @@ * http://www.t11.org/ftp/t11/pub/fc/bb-5/08-543v1.pdf */ -/* - * The FIP ethertype eventually goes in net/if_ether.h. - */ -#ifndef ETH_P_FIP -#define ETH_P_FIP 0x8914 /* FIP Ethertype */ -#endif - #define FIP_DEF_PRI 128 /* default selection priority */ #define FIP_DEF_FC_MAP 0x0efc00 /* default FCoE MAP (MAC OUI) value */ #define FIP_DEF_FKA 8000 /* default FCF keep-alive/advert period (mS) */ -- cgit v1.2.3 From 1f8a6a10fb9437eac3f516ea4324a19087872f30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Jun 2009 18:18:39 +0200 Subject: ring-buffer: pass in lockdep class key for reader_lock On Sun, 7 Jun 2009, Ingo Molnar wrote: > Testing tracer sched_switch: <6>Starting ring buffer hammer > PASSED > Testing tracer sysprof: PASSED > Testing tracer function: PASSED > Testing tracer irqsoff: > ============================================= > PASSED > Testing tracer preemptoff: PASSED > Testing tracer preemptirqsoff: [ INFO: possible recursive locking detected ] > PASSED > Testing tracer branch: 2.6.30-rc8-tip-01972-ge5b9078-dirty #5760 > --------------------------------------------- > rb_consumer/431 is trying to acquire lock: > (&cpu_buffer->reader_lock){......}, at: [] ring_buffer_reset_cpu+0x37/0x70 > > but task is already holding lock: > (&cpu_buffer->reader_lock){......}, at: [] ring_buffer_consume+0x7e/0xc0 > > other info that might help us debug this: > 1 lock held by rb_consumer/431: > #0: (&cpu_buffer->reader_lock){......}, at: [] ring_buffer_consume+0x7e/0xc0 The ring buffer is a generic structure, and can be used outside of ftrace. If ftrace traces within the use of the ring buffer, it can produce false positives with lockdep. This patch passes in a static lock key into the allocation of the ring buffer, so that different ring buffers will have their own lock class. Reported-by: Ingo Molnar Signed-off-by: Peter Zijlstra LKML-Reference: <1244477919.13761.9042.camel@twins> [ store key in ring buffer descriptor ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f1345828c7c5..8670f1575fe1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -105,7 +105,19 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, * size is in bytes for each per CPU buffer. */ struct ring_buffer * -ring_buffer_alloc(unsigned long size, unsigned flags); +__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); + +/* + * Because the ring buffer is generic, if other users of the ring buffer get + * traced by ftrace, it can produce lockdep warnings. We need to keep each + * ring buffer's lock class separate. + */ +#define ring_buffer_alloc(size, flags) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc((size), (flags), &__key); \ +}) + void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); -- cgit v1.2.3 From 9df1bb9b516daeece159ab7fb262d01a0359247c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jun 2009 06:22:57 +0200 Subject: Revert "block: Fix bounce limit setting in DM" This reverts commit a05c0205ba031c01bba33a21bf0a35920eb64833. DM doesn't need to access the bounce_pfn directly. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 989aa1790f48..5e740a135e73 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,7 +910,6 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); -extern void blk_queue_bounce_pfn(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); -- cgit v1.2.3 From 151060ac13144208bd7601d17e4c92c59b98072f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Apr 2009 10:54:54 +0900 Subject: CUSE: implement CUSE - Character device in Userspace CUSE enables implementing character devices in userspace. With recent additions of ioctl and poll support, FUSE already has most of what's necessary to implement character devices. All CUSE has to do is bonding all those components - FUSE, chardev and the driver model - nicely. When client opens /dev/cuse, kernel starts conversation with CUSE_INIT. The client tells CUSE which device it wants to create. As the previous patch made fuse_file usable without associated fuse_inode, CUSE doesn't create super block or inodes. It attaches fuse_file to cdev file->private_data during open and set ff->fi to NULL. The rest of the operation is almost identical to FUSE direct IO case. Each CUSE device has a corresponding directory /sys/class/cuse/DEVNAME (which is symlink to /sys/devices/virtual/class/DEVNAME if SYSFS_DEPRECATED is turned off) which hosts "waiting" and "abort" among other things. Those two files have the same meaning as the FUSE control files. The only notable lacking feature compared to in-kernel implementation is mmap support. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 162e5defe683..d41ed593f79f 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -120,6 +120,13 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) +/** + * CUSE INIT request/reply flags + * + * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl + */ +#define CUSE_UNRESTRICTED_IOCTL (1 << 0) + /** * Release flags */ @@ -210,6 +217,9 @@ enum fuse_opcode { FUSE_DESTROY = 38, FUSE_IOCTL = 39, FUSE_POLL = 40, + + /* CUSE specific operations */ + CUSE_INIT = 4096, }; enum fuse_notify_code { @@ -401,6 +411,27 @@ struct fuse_init_out { __u32 max_write; }; +#define CUSE_INIT_INFO_MAX 4096 + +struct cuse_init_in { + __u32 major; + __u32 minor; + __u32 unused; + __u32 flags; +}; + +struct cuse_init_out { + __u32 major; + __u32 minor; + __u32 unused; + __u32 flags; + __u32 max_read; + __u32 max_write; + __u32 dev_major; /* chardev major */ + __u32 dev_minor; /* chardev minor */ + __u32 spare[10]; +}; + struct fuse_interrupt_in { __u64 unique; }; -- cgit v1.2.3 From 1d589bb16b825b3a7b4edd34d997f1f1f953033d Mon Sep 17 00:00:00 2001 From: john cooper Date: Tue, 9 Jun 2009 14:41:40 +0200 Subject: Add serial number support for virtio_blk, V4a This patch extracts the opaque data from pci i/o region 0 via the added VIRTIO_BLK_F_IDENTIFY field. By convention this data takes the form of that returned by an ATA IDENTIFY DEVICE command, however the driver (except for structure size) makes no interpretation of the data. The structure data is copied wholesale to userspace via a HDIO_GET_IDENTITY ioctl command (eg: hdparm -i ). Signed-off-by: john cooper Signed-off-by: Rusty Russell Signed-off-by: Jens Axboe --- include/linux/virtio_blk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 4dbcbc1c3481..be7d255fc7cf 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -16,6 +16,9 @@ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_IDENTIFY 8 /* ATA IDENTIFY supported */ + +#define VIRTIO_BLK_ID_BYTES (sizeof(__u16[256])) /* IDENTIFY DATA */ struct virtio_blk_config { @@ -33,6 +36,7 @@ struct virtio_blk_config } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ __u32 blk_size; + __u8 identify[VIRTIO_BLK_ID_BYTES]; } __attribute__((packed)); /* These two define direction. */ -- cgit v1.2.3 From 0281b5dc0350cbf6dd21ed558a33cccce77abc02 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Jun 2009 14:50:36 -0700 Subject: cpumask: introduce zalloc_cpumask_var So can get cpumask_var with cpumask_clear Signed-off-by: Yinghai Lu Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9f315382610b..c5ac87ca7bc6 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); @@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, return true; } +static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + cpumask_clear(*mask); + return true; +} + +static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, + int node) +{ + cpumask_clear(*mask); + return true; +} + static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } -- cgit v1.2.3 From 43514774ff40c4fbe0cbbd3d8293a359f1a9fe71 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 8 Jun 2009 18:14:41 -0700 Subject: [SCSI] iscsi class: Add new NETLINK_ISCSI messages for cnic/bnx2i driver. Add ISCSI_NETLINK messages for iSCSI NICs to get information such as path from userspace. Original iscsid messages are now always sent as multicast to group 1. The new messages are sent to group 2. The multicast changes were made by Mike Christie. Signed-off-by: Michael Chan Signed-off-by: Benjamin Li Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/iscsi_if.h | 42 +++++++++++++++++++++++++++++++++++++ include/scsi/scsi_transport_iscsi.h | 5 +++++ 2 files changed, 47 insertions(+) (limited to 'include') diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 2c1a4af9eafb..4426f00da5ff 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -22,6 +22,11 @@ #define ISCSI_IF_H #include +#include +#include + +#define ISCSI_NL_GRP_ISCSID 1 +#define ISCSI_NL_GRP_UIP 2 #define UEVENT_BASE 10 #define KEVENT_BASE 100 @@ -53,6 +58,8 @@ enum iscsi_uevent_e { ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST = UEVENT_BASE + 19, + ISCSI_UEVENT_PATH_UPDATE = UEVENT_BASE + 20, + /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, ISCSI_KEVENT_CONN_ERROR = KEVENT_BASE + 2, @@ -60,6 +67,9 @@ enum iscsi_uevent_e { ISCSI_KEVENT_DESTROY_SESSION = KEVENT_BASE + 4, ISCSI_KEVENT_UNBIND_SESSION = KEVENT_BASE + 5, ISCSI_KEVENT_CREATE_SESSION = KEVENT_BASE + 6, + + ISCSI_KEVENT_PATH_REQ = KEVENT_BASE + 7, + ISCSI_KEVENT_IF_DOWN = KEVENT_BASE + 8, }; enum iscsi_tgt_dscvr { @@ -159,6 +169,9 @@ struct iscsi_uevent { uint32_t param; /* enum iscsi_host_param */ uint32_t len; } set_host_param; + struct msg_set_path { + uint32_t host_no; + } set_path; } u; union { /* messages k -> u */ @@ -192,9 +205,38 @@ struct iscsi_uevent { struct msg_transport_connect_ret { uint64_t handle; } ep_connect_ret; + struct msg_req_path { + uint32_t host_no; + } req_path; + struct msg_notify_if_down { + uint32_t host_no; + } notify_if_down; } r; } __attribute__ ((aligned (sizeof(uint64_t)))); +/* + * To keep the struct iscsi_uevent size the same for userspace code + * compatibility, the main structure for ISCSI_UEVENT_PATH_UPDATE and + * ISCSI_KEVENT_PATH_REQ is defined separately and comes after the + * struct iscsi_uevent in the NETLINK_ISCSI message. + */ +struct iscsi_path { + uint64_t handle; + uint8_t mac_addr[6]; + uint8_t mac_addr_old[6]; + uint32_t ip_addr_len; /* 4 or 16 */ + union { + struct in_addr v4_addr; + struct in6_addr v6_addr; + } src; + union { + struct in_addr v4_addr; + struct in6_addr v6_addr; + } dst; + uint16_t vlan_id; + uint16_t pmtu; +} __attribute__ ((aligned (sizeof(uint64_t)))); + /* * Common error codes */ diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 8cb7a31d9961..349c7f30720d 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -133,6 +133,7 @@ struct iscsi_transport { void (*ep_disconnect) (struct iscsi_endpoint *ep); int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, uint32_t enable, struct sockaddr *dst_addr); + int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params); }; /* @@ -149,6 +150,10 @@ extern void iscsi_conn_error_event(struct iscsi_cls_conn *conn, extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); +extern int iscsi_offload_mesg(struct Scsi_Host *shost, + struct iscsi_transport *transport, uint32_t type, + char *data, uint16_t data_size); + struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ void *dd_data; /* LLD private data */ -- cgit v1.2.3 From 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Jun 2009 13:43:05 +0800 Subject: tracing/events: convert block trace points to TRACE_EVENT() TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions ... Cons: - no dev_t info for the output of plug, unplug_timer and unplug_io events. no dev_t info for getrq and sleeprq events if bio == NULL. no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL. This is mainly because we can't get the deivce from a request queue. But this may change in the future. - A packet command is converted to a string in TP_assign, not TP_print. While blktrace do the convertion just before output. Since pc requests should be rather rare, this is not a big issue. - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT has a unique format, which means we have some unused data in a trace entry. The overhead is minimized by using __dynamic_array() instead of __array(). I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing: dd dd + ioctl blktrace dd + TRACE_EVENT (splice) 1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s 2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s 3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s So the overhead of tracing is very small, and no regression when using those trace events vs blktrace. And the binary output of TRACE_EVENT is much smaller than blktrace: # ls -l -h -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out Following are some comparisons between TRACE_EVENT and blktrace: plug: kjournald-480 [000] 303.084981: block_plug: [kjournald] kjournald-480 [000] 303.084981: 8,0 P N [kjournald] unplug_io: kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1 kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1 remap: kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384 kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384 bio_backmerge: kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald] kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald] getrq: kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald] kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald] bash-2066 [001] 1072.953770: 8,0 G N [bash] bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash] rq_complete: konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0] konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0] ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0] ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0] rq_insert: kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald] kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald] Changelog from v2 -> v3: - use the newly introduced __dynamic_array(). Changelog from v1 -> v2: - use __string() instead of __array() to minimize the memory required to store hex dump of rq->cmd(). - support large pc requests. - add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT. - some cleanups. Signed-off-by: Li Zefan LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/blktrace_api.h | 13 ++ include/trace/block.h | 76 ------- include/trace/events/block.h | 483 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 496 insertions(+), 76 deletions(-) delete mode 100644 include/trace/block.h create mode 100644 include/trace/events/block.h (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 82b4636030e9..c7ec31dd04c9 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -218,5 +218,18 @@ static inline int blk_trace_init_sysfs(struct device *dev) #endif /* CONFIG_BLK_DEV_IO_TRACE */ +#ifdef CONFIG_EVENT_TRACING + +static inline int blk_cmd_buf_len(struct request *rq) +{ + return blk_pc_request(rq) ? rq->cmd_len * 3 : 1; +} + +extern void blk_dump_cmd(char *buf, struct request *rq); +extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); +extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); + +#endif /* CONFIG_EVENT_TRACING */ + #endif /* __KERNEL__ */ #endif diff --git a/include/trace/block.h b/include/trace/block.h deleted file mode 100644 index 5b12efa096b6..000000000000 --- a/include/trace/block.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _TRACE_BLOCK_H -#define _TRACE_BLOCK_H - -#include -#include - -DECLARE_TRACE(block_rq_abort, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_bio_bounce, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_queue, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_getrq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_sleeprq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_plug, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_timer, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_io, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TP_ARGS(q, bio, pdu)); - -DECLARE_TRACE(block_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from), - TP_ARGS(q, bio, dev, from)); - -#endif diff --git a/include/trace/events/block.h b/include/trace/events/block.h new file mode 100644 index 000000000000..a99d1e565bb0 --- /dev/null +++ b/include/trace/events/block.h @@ -0,0 +1,483 @@ +#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BLOCK_H + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM block + +TRACE_EVENT(block_rq_abort, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_insert, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_issue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_requeue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); +TRACE_EVENT(block_bio_bounce, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_complete, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __field( int, error ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->error) +); + +TRACE_EVENT(block_bio_backmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_frontmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_queue, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_getrq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_sleeprq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_plug, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s]", __entry->comm) +); + +TRACE_EVENT(block_unplug_timer, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_unplug_io, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_split, + + TP_PROTO(struct request_queue *q, struct bio *bio, + unsigned int new_sector), + + TP_ARGS(q, bio, new_sector), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( sector_t, new_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->new_sector = new_sector; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu / %llu [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->new_sector, __entry->comm) +); + +TRACE_EVENT(block_remap, + + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from), + + TP_ARGS(q, bio, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + __entry->old_sector) +); + +#endif /* _TRACE_BLOCK_H */ + +/* This part must be outside protection */ +#include + -- cgit v1.2.3 From 80a538e49daddbf3bf783f3464e91bd3181957b2 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 8 Jun 2009 14:40:20 +0800 Subject: drm/i915: Enable probe on new chipset Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- include/drm/drm_pciids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index fc55db780199..8b4c80cdd277 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -536,4 +536,6 @@ {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x0042, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x0046, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0, 0, 0} -- cgit v1.2.3 From 6556d1df88fe68f9836beeb43342a336691cb67c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 9 Jun 2009 14:04:26 -0400 Subject: tracing: fix the block trace points print size The sector field is either u64 or unsigned long depending on the arch. This patch casts the sector to unsigned long long to prevent the printf warnings. [ Impact: remove compile warnings ] Signed-off-by: Steven Rostedt --- include/trace/events/block.h | 45 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index a99d1e565bb0..53effd496a50 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -37,7 +37,8 @@ TRACE_EVENT(block_rq_abort, TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->errors) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); TRACE_EVENT(block_rq_insert, @@ -71,7 +72,8 @@ TRACE_EVENT(block_rq_insert, TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_rq_issue, @@ -105,7 +107,8 @@ TRACE_EVENT(block_rq_issue, TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_rq_requeue, @@ -137,7 +140,8 @@ TRACE_EVENT(block_rq_requeue, TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->errors) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); TRACE_EVENT(block_rq_complete, @@ -169,7 +173,8 @@ TRACE_EVENT(block_rq_complete, TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->errors) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); TRACE_EVENT(block_bio_bounce, @@ -195,7 +200,8 @@ TRACE_EVENT(block_bio_bounce, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_bio_complete, @@ -221,7 +227,8 @@ TRACE_EVENT(block_bio_complete, TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->error) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->error) ); TRACE_EVENT(block_bio_backmerge, @@ -248,7 +255,8 @@ TRACE_EVENT(block_bio_backmerge, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_bio_frontmerge, @@ -275,7 +283,8 @@ TRACE_EVENT(block_bio_frontmerge, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_bio_queue, @@ -302,7 +311,8 @@ TRACE_EVENT(block_bio_queue, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_getrq, @@ -330,7 +340,8 @@ TRACE_EVENT(block_getrq, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_sleeprq, @@ -358,7 +369,8 @@ TRACE_EVENT(block_sleeprq, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_plug, @@ -441,7 +453,9 @@ TRACE_EVENT(block_split, TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->new_sector, __entry->comm) + (unsigned long long)__entry->sector, + (unsigned long long)__entry->new_sector, + __entry->comm) ); TRACE_EVENT(block_remap, @@ -471,9 +485,10 @@ TRACE_EVENT(block_remap, TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, + (unsigned long long)__entry->sector, + __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), - __entry->old_sector) + (unsigned long long)__entry->old_sector) ); #endif /* _TRACE_BLOCK_H */ -- cgit v1.2.3 From 725c624a58a10ef90a2ff889e122158fabf36147 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Jun 2009 19:09:45 -0400 Subject: tracing: add trace_seq_vprint interface The code to update the print formats for events requires a vprintf format in the trace_seq. This patch adds that interface. Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index ba9627f00d3f..c68bccba2074 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -27,6 +27,8 @@ trace_seq_init(struct trace_seq *s) #ifdef CONFIG_TRACING extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); extern int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); -- cgit v1.2.3 From fd6c3a8dc44329d3aff9a578b5120982f63711ee Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 10:58:33 +0000 Subject: initconst adjustments - add .init.rodata to INIT_DATA, and group all initconst flavors together - move strings generated from __setup_param() into .init.rodata - add .*init.rodata to modpost's sets of init sections - make modpost warn about references between meminit and cpuinit as well as memexit and cpuexit sections (as CPU and memory hotplug are independently selectable features) Signed-off-by: Jan Beulich Signed-off-by: Sam Ravnborg --- include/asm-generic/vmlinux.lds.h | 5 +++-- include/linux/init.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 89853bcd27a6..3edb11499743 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -336,10 +336,11 @@ #define INIT_DATA \ *(.init.data) \ DEV_DISCARD(init.data) \ - DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.data) \ - CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.data) \ + *(.init.rodata) \ + DEV_DISCARD(init.rodata) \ + CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) #define INIT_TEXT \ diff --git a/include/linux/init.h b/include/linux/init.h index 0e06c176f185..9f70c9f25d4b 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -225,7 +225,8 @@ struct obs_kernel_param { * obs_kernel_param "array" too far apart in .init.setup. */ #define __setup_param(str, unique_id, fn, early) \ - static char __setup_str_##unique_id[] __initdata __aligned(1) = str; \ + static const char __setup_str_##unique_id[] __initconst \ + __aligned(1) = str; \ static struct obs_kernel_param __setup_##unique_id \ __used __section(.init.setup) \ __attribute__((aligned((sizeof(long))))) \ -- cgit v1.2.3 From ef53dae8658cf0e93d380983824a661067948d87 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 7 Jun 2009 20:46:37 +0200 Subject: Improve vmlinux.lds.h support for arch specific linker scripts To support alingment of the individual architecture specific linker scripts provide a set of general definitions in vmlinux.lds.h With these definitions applied the diverse linekr scripts can be reduced in line count and their readability are improved - IMO. A sample linker script is included to give the preferred order of the sections for the architectures that do not have any special requirments. These definitions are also a first step towards eventual support for -ffunction-sections. The definitions makes it much easier to do a global renaming of section names - but the main purpose is to clean up the linker scripts. Tim Aboot has provided a lot of inputs to improve the definitions - all faults are mine. Signed-off-by: Sam Ravnborg Cc: Tim Abbott --- include/asm-generic/vmlinux.lds.h | 231 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 224 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3edb11499743..fba42236e942 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1,4 +1,58 @@ -#include +/* + * Helper macros to support writing architecture specific + * linker scripts. + * + * A minimal linker scripts has following content: + * [This is a sample, architectures may have special requiriements] + * + * OUTPUT_FORMAT(...) + * OUTPUT_ARCH(...) + * ENTRY(...) + * SECTIONS + * { + * . = START; + * __init_begin = .; + * HEAD_SECTION + * INIT_TEXT_SECTION(PAGE_SIZE) + * INIT_DATA_SECTION(...) + * PERCPU(PAGE_SIZE) + * __init_end = .; + * + * _stext = .; + * TEXT_SECTION = 0 + * _etext = .; + * + * _sdata = .; + * RO_DATA_SECTION(PAGE_SIZE) + * RW_DATA_SECTION(...) + * _edata = .; + * + * EXCEPTION_TABLE(...) + * NOTES + * + * __bss_start = .; + * BSS_SECTION(0, 0) + * __bss_stop = .; + * _end = .; + * + * /DISCARD/ : { + * EXIT_TEXT + * EXIT_DATA + * *(.exitcall.exit) + * } + * STABS_DEBUG + * DWARF_DEBUG + * } + * + * [__init_begin, __init_end] is the init section that may be freed after init + * [_stext, _etext] is the text section + * [_sdata, _edata] is the data section + * + * Some of the included output section have their own set of constants. + * Examples are: [__initramfs_start, __initramfs_end] for initramfs and + * [__nosave_begin, __nosave_end] for the nosave data + */ + #include #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 @@ -116,7 +170,36 @@ FTRACE_EVENTS() \ TRACE_SYSCALLS() -#define RO_DATA(align) \ +/* + * Data section helpers + */ +#define NOSAVE_DATA \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__nosave_begin) = .; \ + *(.data.nosave) \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__nosave_end) = .; + +#define PAGE_ALIGNED_DATA(page_align) \ + . = ALIGN(page_align); \ + *(.data.page_aligned) + +#define READ_MOSTLY_DATA(align) \ + . = ALIGN(align); \ + *(.data.read_mostly) + +#define CACHELINE_ALIGNED_DATA(align) \ + . = ALIGN(align); \ + *(.data.cacheline_aligned) + +#define INIT_TASK(align) \ + . = ALIGN(align); \ + *(.data.init_task) + +/* + * Read only Data + */ +#define RO_DATA_SECTION(align) \ . = ALIGN((align)); \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_rodata) = .; \ @@ -270,9 +353,10 @@ } \ . = ALIGN((align)); -/* RODATA provided for backward compatibility. +/* RODATA & RO_DATA provided for backward compatibility. * All archs are supposed to use RO_DATA() */ -#define RODATA RO_DATA(4096) +#define RODATA RO_DATA_SECTION(4096) +#define RO_DATA(align) RO_DATA_SECTION(align) #define SECURITY_INIT \ .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \ @@ -332,6 +416,31 @@ /* Section used for early init (in .S files) */ #define HEAD_TEXT *(HEAD_TEXT_SECTION) +#define HEAD_SECTION \ + .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \ + HEAD_TEXT \ + } + +/* + * Exception table + */ +#define EXCEPTION_TABLE(align) \ + . = ALIGN(align); \ + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___ex_table) = .; \ + *(__ex_table) \ + VMLINUX_SYMBOL(__stop___ex_table) = .; \ + } + +/* + * Init task + */ +#define INIT_TASK_DATA(align) \ + . = ALIGN(align); \ + .data.init_task : { \ + INIT_TASK \ + } + /* init and exit section handling */ #define INIT_DATA \ *(.init.data) \ @@ -364,9 +473,32 @@ CPU_DISCARD(exit.text) \ MEM_DISCARD(exit.text) - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to - the beginning of the section so we begin them at 0. */ +/* + * bss (Block Started by Symbol) - uninitialized data + * zeroed during startup + */ +#define SBSS \ + .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { \ + *(.sbss) \ + *(.scommon) \ + } + +#define BSS(bss_align) \ + . = ALIGN(bss_align); \ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__bss_start) = .; \ + *(.bss.page_aligned) \ + *(.dynbss) \ + *(.bss) \ + *(COMMON) \ + VMLINUX_SYMBOL(__bss_stop) = .; \ + } + +/* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to + * the beginning of the section so we begin them at 0. + */ #define DWARF_DEBUG \ /* DWARF 1 */ \ .debug 0 : { *(.debug) } \ @@ -433,6 +565,12 @@ VMLINUX_SYMBOL(__stop_notes) = .; \ } +#define INIT_SETUP(initsetup_align) \ + . = ALIGN(initsetup_align); \ + VMLINUX_SYMBOL(__setup_start) = .; \ + *(.init.setup) \ + VMLINUX_SYMBOL(__setup_end) = .; + #define INITCALLS \ *(.initcallearly.init) \ VMLINUX_SYMBOL(__early_initcall_end) = .; \ @@ -454,6 +592,31 @@ *(.initcall7.init) \ *(.initcall7s.init) +#define INIT_CALLS \ + VMLINUX_SYMBOL(__initcall_start) = .; \ + INITCALLS \ + VMLINUX_SYMBOL(__initcall_end) = .; + +#define CON_INITCALL \ + VMLINUX_SYMBOL(__con_initcall_start) = .; \ + *(.con_initcall.init) \ + VMLINUX_SYMBOL(__con_initcall_end) = .; + +#define SECURITY_INITCALL \ + VMLINUX_SYMBOL(__security_initcall_start) = .; \ + *(.security_initcall.init) \ + VMLINUX_SYMBOL(__security_initcall_end) = .; + +#ifdef CONFIG_BLK_DEV_INITRD +#define INIT_RAM_FS \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__initramfs_start) = .; \ + *(.init.ramfs) \ + VMLINUX_SYMBOL(__initramfs_end) = .; +#else +#define INITRAMFS +#endif + /** * PERCPU_VADDR - define output section for percpu area * @vaddr: explicit base address (optional) @@ -510,3 +673,57 @@ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } + + +/* + * Definition of the high level *_SECTION macros + * They will fit only a subset of the architectures + */ + + +/* + * Writeable data. + * All sections are combined in a single .data section. + * The sections following CONSTRUCTORS are arranged so their + * typical alignment matches. + * A cacheline is typical/always less than a PAGE_SIZE so + * the sections that has this restriction (or similar) + * is located before the ones requiring PAGE_SIZE alignment. + * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which + * matches the requirment of PAGE_ALIGNED_DATA. + * +/* use 0 as page_align if page_aligned data is not used */ +#define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask) \ + . = ALIGN(PAGE_SIZE); \ + .data : AT(ADDR(.data) - LOAD_OFFSET) { \ + INIT_TASK(inittask) \ + CACHELINE_ALIGNED_DATA(cacheline) \ + READ_MOSTLY_DATA(cacheline) \ + DATA_DATA \ + CONSTRUCTORS \ + NOSAVE_DATA(nosave) \ + PAGE_ALIGNED_DATA(pagealigned) \ + } + +#define INIT_TEXT_SECTION(inittext_align) \ + . = ALIGN(inittext_align); \ + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(_sinittext) = .; \ + INIT_TEXT \ + VMLINUX_SYMBOL(_einittext) = .; \ + } + +#define INIT_DATA_SECTION(initsetup_align) \ + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { \ + INIT_DATA \ + INIT_SETUP(initsetup_align) \ + INIT_CALLS \ + CON_INITCALL \ + SECURITY_INITCALL \ + INIT_RAM_FS \ + } + +#define BSS_SECTION(sbss_align, bss_align) \ + SBSS \ + BSS(bss_align) \ + . = ALIGN(4); \ -- cgit v1.2.3 From cf9e4e15e8f6306b2559979269ead7c02e6b2b95 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 11 Feb 2009 16:03:36 +0800 Subject: KVM: Split IOAPIC structure Prepared for reuse ioapic_redir_entry for MSI. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_types.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2b8318c83e53..b84aca3c4ad1 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,4 +40,21 @@ typedef unsigned long hfn_t; typedef hfn_t pfn_t; +union kvm_ioapic_redirect_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; +}; + #endif /* __KVM_TYPES_H__ */ -- cgit v1.2.3 From 116191b69b608d0f1513e3abe71d6a46800f2bd6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 11 Feb 2009 16:03:37 +0800 Subject: KVM: Unify the delivery of IOAPIC and MSI interrupts Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 894a56e365e8..1a2f98fbecea 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -352,6 +352,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); +void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, + union kvm_ioapic_redirect_entry *entry, + unsigned long *deliver_bitmask); int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, -- cgit v1.2.3 From c1e01514296e8a4a43ff0c88dcff635cb90feb5f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:26 +0800 Subject: KVM: Ioctls for init MSI-X entry Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls. This two ioctls are used by userspace to specific guest device MSI-X entry number and correlate MSI-X entry with GSI during the initialization stage. MSI-X should be well initialzed before enabling. Don't support change MSI-X entry number for now. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 18 ++++++++++++++++++ include/linux/kvm_host.h | 10 ++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8cc137911b34..78cdee8c6355 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -487,6 +487,10 @@ struct kvm_irq_routing { #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR \ + _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY \ + _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) /* * ioctls for vcpu fds @@ -607,4 +611,18 @@ struct kvm_assigned_irq { #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 512 +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1a2f98fbecea..432edc27e82b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -319,6 +319,12 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +struct kvm_guest_msix_entry { + u32 vector; + u16 entry; + u16 flags; +}; + struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; struct work_struct interrupt_work; @@ -326,13 +332,17 @@ struct kvm_assigned_dev_kernel { int assigned_dev_id; int host_busnr; int host_devfn; + unsigned int entries_nr; int host_irq; bool host_irq_disabled; + struct msix_entry *host_msix_entries; int guest_irq; + struct kvm_guest_msix_entry *guest_msix_entries; #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) +#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10)) unsigned long irq_requested_type; int irq_source_id; int flags; -- cgit v1.2.3 From 2350bd1f62c8706c22b8e58c3bfff10806c0a31b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:27 +0800 Subject: KVM: Add MSI-X interrupt injection logic We have to handle more than one interrupt with one handler for MSI-X. Avi suggested to use a flag to indicate the pending. So here is it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 432edc27e82b..3832243625d4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -319,6 +319,7 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +#define KVM_ASSIGNED_MSIX_PENDING 0x1 struct kvm_guest_msix_entry { u32 vector; u16 entry; -- cgit v1.2.3 From d510d6cc653bc4b3094ea73afe12600d0ab445b3 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:28 +0800 Subject: KVM: Enable MSI-X for KVM assigned device This patch finally enable MSI-X. What we need for MSI-X: 1. Intercept one page in MMIO region of device. So that we can get guest desired MSI-X table and set up the real one. Now this have been done by guest, and transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY. 2. Information for incoming interrupt. Now one device can have more than one interrupt, and they are all handled by one workqueue structure. So we need to identify them. The previous patch enable gsi_msg_pending_bitmap get this done. 3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X message address/data. We used same entry number for the host and guest here, so that it's easy to find the correlated guest gsi. What we lack for now: 1. The PCI spec said nothing can existed with MSI-X table in the same page of MMIO region, except pending bits. The patch ignore pending bits as the first step (so they are always 0 - no pending). 2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch didn't support this, and Linux also don't work in this way. 3. The patch didn't implement MSI-X mask all and mask single entry. I would implement the former in driver/pci/msi.c later. And for single entry, userspace should have reposibility to handle it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 78cdee8c6355..640835ed2708 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -409,6 +409,9 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +#ifdef __KVM_HAVE_MSIX +#define KVM_CAP_DEVICE_MSIX 28 +#endif /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -611,6 +614,11 @@ struct kvm_assigned_irq { #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\ + KVM_DEV_IRQ_ASSIGN_MASK_MSIX) +#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1) +#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2) + struct kvm_assigned_msix_nr { __u32 assigned_dev_id; __u16 entry_nr; -- cgit v1.2.3 From b95b51d580bff9376850eef29d34c3aa08c26db7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 26 Feb 2009 13:55:33 +0100 Subject: KVM: declare ioapic functions only on affected hardware Since "KVM: Unify the delivery of IOAPIC and MSI interrupts" I get the following warnings: CC [M] arch/s390/kvm/kvm-s390.o In file included from arch/s390/kvm/kvm-s390.c:22: include/linux/kvm_host.h:357: warning: 'struct kvm_ioapic' declared inside parameter list include/linux/kvm_host.h:357: warning: its scope is only this definition or declaration, which is probably not what you want This patch limits IOAPIC functions for architectures that have one. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3832243625d4..3b91ec9982c2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -363,9 +363,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); +#ifdef __KVM_HAVE_IOAPIC void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask); +#endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, -- cgit v1.2.3 From a53c17d21c46a752f5ac6695376481bc27865b04 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:34:49 +0200 Subject: KVM: ioapic/msi interrupt delivery consolidation ioapic_deliver() and kvm_set_msi() have code duplication. Move the code into ioapic_deliver_entry() function and call it from both places. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b91ec9982c2..ec9d078b1e8e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -364,7 +364,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); #ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, +void kvm_get_intr_delivery_bitmask(struct kvm *kvm, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask); #endif -- cgit v1.2.3 From 343f94fe4d16ec898da77720c03da9e09f8523d2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:34:54 +0200 Subject: KVM: consolidate ioapic/ipi interrupt delivery logic Use kvm_apic_match_dest() in kvm_get_intr_delivery_bitmask() instead of duplicating the same code. Use kvm_get_intr_delivery_bitmask() in apic_send_ipi() to figure out ipi destination instead of reimplementing the logic. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec9d078b1e8e..fb60f31c4fb3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -363,11 +363,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); -#ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm *kvm, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); -#endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, -- cgit v1.2.3 From 58c2dde17d6eb6c8c0566e52d184aa16755d890f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:35:04 +0200 Subject: KVM: APIC: get rid of deliver_bitmask Deliver interrupt during destination matching loop. Signed-off-by: Gleb Natapov Acked-by: Xiantao Zhang Signed-off-by: Marcelo Tosatti --- include/linux/kvm_types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b84aca3c4ad1..fb46efbeabec 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -57,4 +57,14 @@ union kvm_ioapic_redirect_entry { } fields; }; +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + #endif /* __KVM_TYPES_H__ */ -- cgit v1.2.3 From e56d532f20c890a06bbe7cd479f4201e3a03cd73 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 12 Mar 2009 21:45:39 +0800 Subject: KVM: Device assignment framework rework After discussion with Marcelo, we decided to rework device assignment framework together. The old problems are kernel logic is unnecessary complex. So Marcelo suggest to split it into a more elegant way: 1. Split host IRQ assign and guest IRQ assign. And userspace determine the combination. Also discard msi2intx parameter, userspace can specific KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_INTX in assigned_irq->flags to enable MSI to INTx convertion. 2. Split assign IRQ and deassign IRQ. Import two new ioctls: KVM_ASSIGN_DEV_IRQ and KVM_DEASSIGN_DEV_IRQ. This patch also fixed the reversed _IOR vs _IOW in definition(by deprecated the old interface). [avi: replace homemade bitcount() by hweight_long()] Signed-off-by: Marcelo Tosatti Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 26 +++++++++++++++++--------- include/linux/kvm_host.h | 5 ----- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 640835ed2708..644e3a9f47db 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -412,6 +412,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_MSIX #define KVM_CAP_DEVICE_MSIX 28 #endif +#define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -485,8 +486,10 @@ struct kvm_irq_routing { #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ struct kvm_assigned_pci_dev) @@ -494,6 +497,7 @@ struct kvm_irq_routing { _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) #define KVM_ASSIGN_SET_MSIX_ENTRY \ _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -584,6 +588,8 @@ struct kvm_debug_guest { #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + struct kvm_assigned_pci_dev { __u32 assigned_dev_id; __u32 busnr; @@ -594,6 +600,17 @@ struct kvm_assigned_pci_dev { }; }; +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +#define KVM_DEV_IRQ_HOST_MASK 0x00ff +#define KVM_DEV_IRQ_GUEST_MASK 0xff00 + struct kvm_assigned_irq { __u32 assigned_dev_id; __u32 host_irq; @@ -609,15 +626,6 @@ struct kvm_assigned_irq { }; }; -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) - -#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) - -#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\ - KVM_DEV_IRQ_ASSIGN_MASK_MSIX) -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1) -#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2) struct kvm_assigned_msix_nr { __u32 assigned_dev_id; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb60f31c4fb3..40e49ede8f91 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -339,11 +339,6 @@ struct kvm_assigned_dev_kernel { struct msix_entry *host_msix_entries; int guest_irq; struct kvm_guest_msix_entry *guest_msix_entries; -#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) -#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) -#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) -#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) -#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10)) unsigned long irq_requested_type; int irq_source_id; int flags; -- cgit v1.2.3 From 78646121e9a2fcf7977cc15966420e572a450bc3 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 23 Mar 2009 12:12:11 +0200 Subject: KVM: Fix interrupt unhalting a vcpu when it shouldn't kvm_vcpu_block() unhalts vpu on an interrupt/timer without checking if interrupt window is actually opened. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 40e49ede8f91..72d56844f388 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); void kvm_free_physmem(struct kvm *kvm); -- cgit v1.2.3 From 2f8b9ee14eb439008e0c5131116ea6baa40dba50 Mon Sep 17 00:00:00 2001 From: nathan binkert Date: Fri, 27 Mar 2009 21:53:05 -0700 Subject: KVM: Make kvm header C++ friendly Two things needed fixing: 1) g++ does not allow a named structure type within an anonymous union and 2) Avoid name clash between two padding fields within the same struct by giving them different names as is done elsewhere in the header. Signed-off-by: Nathan Binkert Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 644e3a9f47db..3db5d8d37485 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -119,7 +119,7 @@ struct kvm_run { __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct kvm_io { + struct { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; @@ -224,10 +224,10 @@ struct kvm_interrupt { /* for KVM_GET_DIRTY_LOG */ struct kvm_dirty_log { __u32 slot; - __u32 padding; + __u32 padding1; union { void __user *dirty_bitmap; /* one bit per page */ - __u64 padding; + __u64 padding2; }; }; -- cgit v1.2.3 From 522c68c4416de3cd3e11a9ff10d58e776a69ae1e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 27 Apr 2009 20:35:43 +0800 Subject: KVM: Enable snooping control for supported hardware Memory aliases with different memory type is a problem for guest. For the guest without assigned device, the memory type of guest memory would always been the same as host(WB); but for the assigned device, some part of memory may be used as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of host memory type then be a potential issue. Snooping control can guarantee the cache correctness of memory go through the DMA engine of VT-d. [avi: fix build on ia64] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 72d56844f388..bdce8e1303c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -367,6 +367,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); +/* For vcpu->arch.iommu_flags */ +#define KVM_IOMMU_CACHE_COHERENCY 0x1 + #ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -- cgit v1.2.3 From 32f8840064d88cc3f6e85203aec7b6b57bebcb97 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 May 2009 17:55:12 -0300 Subject: KVM: use smp_send_reschedule in kvm_vcpu_kick KVM uses a function call IPI to cause the exit of a guest running on a physical cpu. For virtual interrupt notification there is no need to wait on IPI receival, or to execute any function. This is exactly what the reschedule IPI does, without the overhead of function IPI. So use it instead of smp_call_function_single in kvm_vcpu_kick. Also change the "guest_mode" variable to a bit in vcpu->requests, and use that to collapse multiple IPI's that would be issued between the first one and zeroing of guest mode. This allows kvm_vcpu_kick to called with interrupts disabled. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bdce8e1303c9..161816284192 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -38,6 +38,7 @@ #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_KVMCLOCK_UPDATE 8 +#define KVM_REQ_KICK 9 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 @@ -72,7 +73,6 @@ struct kvm_vcpu { struct mutex mutex; int cpu; struct kvm_run *run; - int guest_mode; unsigned long requests; unsigned long guest_debug; int fpu_active; -- cgit v1.2.3 From 547de29e5b1662deb05b5f90917902dc0e9ac182 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 May 2009 17:55:13 -0300 Subject: KVM: protect assigned dev workqueue, int handler and irq acker kvm_assigned_dev_ack_irq is vulnerable to a race condition with the interrupt handler function. It does: if (dev->host_irq_disabled) { enable_irq(dev->host_irq); dev->host_irq_disabled = false; } If an interrupt triggers before the host->dev_irq_disabled assignment, it will disable the interrupt and set dev->host_irq_disabled to true. On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to false, and the next kvm_assigned_dev_ack_irq call will fail to reenable it. Other than that, having the interrupt handler and work handlers run in parallel sounds like asking for trouble (could not spot any obvious problem, but better not have to, its fragile). CC: sheng.yang@intel.com Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 161816284192..aacc5449f586 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,6 +345,7 @@ struct kvm_assigned_dev_kernel { int flags; struct pci_dev *dev; struct kvm *kvm; + spinlock_t assigned_dev_lock; }; struct kvm_irq_mask_notifier { -- cgit v1.2.3 From 04dce7d9d429ea5ea04e9432d1726c930f4d67da Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jun 2009 16:59:46 +1000 Subject: spinlock: Add missing __raw_spin_lock_flags() stub for UP This was only defined with CONFIG_DEBUG_SPINLOCK set, but some obscure arch/powerpc code wants it always. Signed-off-by: Benjamin Herrenschmidt Acked-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/spinlock_up.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 938234c4a996..d4841ed8215b 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) #define __raw_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ # define __raw_spin_lock(lock) do { (void)(lock); } while (0) +# define __raw_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) # define __raw_spin_unlock(lock) do { (void)(lock); } while (0) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ -- cgit v1.2.3 From f1db457ce6e2f63cb01022f58c0c023838958bd1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 10 Jun 2009 10:06:24 +0800 Subject: tracing/events: convert block trace points to TRACE_EVENT(), fix !CONFIG_BLOCK Fix building failures when CONFIG_BLOCK == n. Signed-off-by: Li Zefan LKML-Reference: <4A2F1520.8020003@cn.fujitsu.com> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index c7ec31dd04c9..7e4350ece0f8 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -218,7 +218,7 @@ static inline int blk_trace_init_sysfs(struct device *dev) #endif /* CONFIG_BLK_DEV_IO_TRACE */ -#ifdef CONFIG_EVENT_TRACING +#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) static inline int blk_cmd_buf_len(struct request *rq) { @@ -229,7 +229,7 @@ extern void blk_dump_cmd(char *buf, struct request *rq); extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); -#endif /* CONFIG_EVENT_TRACING */ +#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From feb6118d4e76bc5685909426c2dcc90fdb7ba05b Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:00:39 +0300 Subject: [SCSI] libosd: OSD2r05: Additional command enums Add all constant definitions of new OSD commands added in revision 4 & 5. Mainly for creating snapshots and clones. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h index 62b2ab8c69d4..2cc8e8b1cc19 100644 --- a/include/scsi/osd_protocol.h +++ b/include/scsi/osd_protocol.h @@ -303,7 +303,15 @@ enum osd_service_actions { OSD_ACT_V2(REMOVE_MEMBER_OBJECTS, 0x21) OSD_ACT_V2(GET_MEMBER_ATTRIBUTES, 0x22) OSD_ACT_V2(SET_MEMBER_ATTRIBUTES, 0x23) + + OSD_ACT_V2(CREATE_CLONE, 0x28) + OSD_ACT_V2(CREATE_SNAPSHOT, 0x29) + OSD_ACT_V2(DETACH_CLONE, 0x2A) + OSD_ACT_V2(REFRESH_SNAPSHOT_CLONE, 0x2B) + OSD_ACT_V2(RESTORE_PARTITION_FROM_SNAPSHOT, 0x2C) + OSD_ACT_V2(READ_MAP, 0x31) + OSD_ACT_V2(READ_MAPS_COMPARE, 0x32) OSD_ACT_V1_V2(PERFORM_SCSI_COMMAND, 0x8F7E, 0x8F7C) OSD_ACT_V1_V2(SCSI_TASK_MANAGEMENT, 0x8F7F, 0x8F7D) -- cgit v1.2.3 From 29191b92030bc97b05b539ce5ae0543c24a6d7ca Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:01:03 +0300 Subject: [SCSI] libosd: OSD2r05: Attribute definitions Some New revision 5 Attribute definitions. Some missing definitions of Attributes and pages Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_attributes.h | 74 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h index f888a6fda073..56e920ade326 100644 --- a/include/scsi/osd_attributes.h +++ b/include/scsi/osd_attributes.h @@ -29,6 +29,7 @@ enum { OSD_APAGE_PARTITION_INFORMATION = OSD_APAGE_PARTITION_FIRST + 1, OSD_APAGE_PARTITION_QUOTAS = OSD_APAGE_PARTITION_FIRST + 2, OSD_APAGE_PARTITION_TIMESTAMP = OSD_APAGE_PARTITION_FIRST + 3, + OSD_APAGE_PARTITION_ATTR_ACCESS = OSD_APAGE_PARTITION_FIRST + 4, OSD_APAGE_PARTITION_SECURITY = OSD_APAGE_PARTITION_FIRST + 5, OSD_APAGE_PARTITION_LAST = 0x5FFFFFFF, @@ -51,7 +52,9 @@ enum { OSD_APAGE_RESERVED_TYPE_LAST = 0xEFFFFFFF, OSD_APAGE_COMMON_FIRST = 0xF0000000, - OSD_APAGE_COMMON_LAST = 0xFFFFFFFE, + OSD_APAGE_COMMON_LAST = 0xFFFFFFFD, + + OSD_APAGE_CURRENT_COMMAND = 0xFFFFFFFE, OSD_APAGE_REQUEST_ALL = 0xFFFFFFFF, }; @@ -106,10 +109,30 @@ enum { OSD_ATTR_RI_PRODUCT_REVISION_LEVEL = 0x7, /* 4 */ OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER = 0x8, /* variable */ OSD_ATTR_RI_OSD_NAME = 0x9, /* variable */ + OSD_ATTR_RI_MAX_CDB_CONTINUATION_LEN = 0xA, /* 4 */ OSD_ATTR_RI_TOTAL_CAPACITY = 0x80, /* 8 */ OSD_ATTR_RI_USED_CAPACITY = 0x81, /* 8 */ OSD_ATTR_RI_NUMBER_OF_PARTITIONS = 0xC0, /* 8 */ OSD_ATTR_RI_CLOCK = 0x100, /* 6 */ + OARI_DEFAULT_ISOLATION_METHOD = 0X110, /* 1 */ + OARI_SUPPORTED_ISOLATION_METHODS = 0X111, /* 32 */ + + OARI_DATA_ATOMICITY_GUARANTEE = 0X120, /* 8 */ + OARI_DATA_ATOMICITY_ALIGNMENT = 0X121, /* 8 */ + OARI_ATTRIBUTES_ATOMICITY_GUARANTEE = 0X122, /* 8 */ + OARI_DATA_ATTRIBUTES_ATOMICITY_MULTIPLIER = 0X123, /* 1 */ + + OARI_MAXIMUM_SNAPSHOTS_COUNT = 0X1C1, /* 0 or 4 */ + OARI_MAXIMUM_CLONES_COUNT = 0X1C2, /* 0 or 4 */ + OARI_MAXIMUM_BRANCH_DEPTH = 0X1CC, /* 0 or 4 */ + OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_FIRST = 0X200, /* 0 or 4 */ + OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_LAST = 0X2ff, /* 0 or 4 */ + OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_FIRST = 0X300, /* 0 or 4 */ + OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_LAST = 0X30F, /* 0 or 4 */ + OARI_SUPPORT_FOR_DUPLICATED_OBJECT_FREEZING = 0X310, /* 0 or 4 */ + OARI_SUPPORT_FOR_SNAPSHOT_REFRESHING = 0X311, /* 0 or 1 */ + OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_FIRST = 0X7000001,/* 0 or 4 */ + OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_LAST = 0X700FFFF,/* 0 or 4 */ }; /* Root_Information_attributes_page does not have a get_page structure */ @@ -120,7 +143,15 @@ enum { OSD_ATTR_PI_PARTITION_ID = 0x1, /* 8 */ OSD_ATTR_PI_USERNAME = 0x9, /* variable */ OSD_ATTR_PI_USED_CAPACITY = 0x81, /* 8 */ + OSD_ATTR_PI_USED_CAPACITY_INCREMENT = 0x84, /* 0 or 8 */ OSD_ATTR_PI_NUMBER_OF_OBJECTS = 0xC1, /* 8 */ + + OSD_ATTR_PI_ACTUAL_DATA_SPACE = 0xD1, /* 0 or 8 */ + OSD_ATTR_PI_RESERVED_DATA_SPACE = 0xD2, /* 0 or 8 */ + OSD_ATTR_PI_DEFAULT_SNAPSHOT_DUPLICATION_METHOD = 0x200,/* 0 or 4 */ + OSD_ATTR_PI_DEFAULT_CLONE_DUPLICATION_METHOD = 0x201,/* 0 or 4 */ + OSD_ATTR_PI_DEFAULT_SP_TIME_OF_DUPLICATION = 0x300,/* 0 or 4 */ + OSD_ATTR_PI_DEFAULT_CLONE_TIME_OF_DUPLICATION = 0x301,/* 0 or 4 */ }; /* Partition Information attributes page does not have a get_page structure */ @@ -131,6 +162,7 @@ enum { OSD_ATTR_CI_PARTITION_ID = 0x1, /* 8 */ OSD_ATTR_CI_COLLECTION_OBJECT_ID = 0x2, /* 8 */ OSD_ATTR_CI_USERNAME = 0x9, /* variable */ + OSD_ATTR_CI_COLLECTION_TYPE = 0xA, /* 1 */ OSD_ATTR_CI_USED_CAPACITY = 0x81, /* 8 */ }; /* Collection Information attributes page does not have a get_page structure */ @@ -144,6 +176,8 @@ enum { OSD_ATTR_OI_USERNAME = 0x9, /* variable */ OSD_ATTR_OI_USED_CAPACITY = 0x81, /* 8 */ OSD_ATTR_OI_LOGICAL_LENGTH = 0x82, /* 8 */ + SD_ATTR_OI_ACTUAL_DATA_SPACE = 0XD1, /* 0 OR 8 */ + SD_ATTR_OI_RESERVED_DATA_SPACE = 0XD2, /* 0 OR 8 */ }; /* Object Information attributes page does not have a get_page structure */ @@ -248,7 +282,18 @@ struct object_timestamps_attributes_page { struct osd_timestamp data_modified_time; } __packed; -/* 7.1.2.19 Collections attributes page */ +/* OSD2r05: 7.1.3.19 Attributes Access attributes page + * (OSD_APAGE_PARTITION_ATTR_ACCESS) + * + * each attribute is of the form below. Total array length is deduced + * from the attribute's length + * (See allowed_attributes_access of the struct osd_cap_object_descriptor) + */ +struct attributes_access_attr { + struct osd_attributes_list_attrid attr_list[0]; +} __packed; + +/* OSD2r05: 7.1.2.21 Collections attributes page */ /* TBD */ /* 7.1.2.20 Root Policy/Security attributes page (OSD_APAGE_ROOT_SECURITY) */ @@ -324,4 +369,29 @@ struct object_security_attributes_page { __be32 policy_access_tag; } __packed; +/* OSD2r05: 7.1.3.31 Current Command attributes page + * (OSD_APAGE_CURRENT_COMMAND) + */ +enum { + OSD_ATTR_CC_RESPONSE_INTEGRITY_CHECK_VALUE = 0x1, /* 32 */ + OSD_ATTR_CC_OBJECT_TYPE = 0x2, /* 1 */ + OSD_ATTR_CC_PARTITION_ID = 0x3, /* 8 */ + OSD_ATTR_CC_OBJECT_ID = 0x4, /* 8 */ + OSD_ATTR_CC_STARTING_BYTE_ADDRESS_OF_APPEND = 0x5, /* 8 */ + OSD_ATTR_CC_CHANGE_IN_USED_CAPACITY = 0x6, /* 8 */ +}; + +/*TBD: osdv1_current_command_attributes_page */ + +struct osdv2_current_command_attributes_page { + struct osd_attr_page_header hdr; /* id=0xFFFFFFFE, size=0x44 */ + u8 response_integrity_check_value[OSD_CRYPTO_KEYID_SIZE]; + u8 object_type; + u8 reserved[3]; + __be64 partition_id; + __be64 object_id; + __be64 starting_byte_address_of_append; + __be64 change_in_used_capacity; +}; + #endif /*ndef __OSD_ATTRIBUTES_H__*/ -- cgit v1.2.3 From 0e35afbc8b054e04a35faa796c72abb3b82bd33b Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:02:22 +0300 Subject: [SCSI] libosd: osd_req_{read,write}_kern new API By popular demand, define usefull wrappers for osd_req_read/write that recieve kernel pointers. All users had their own. Also remove these from exofs Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index b24d9616eb46..6132790d678f 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -364,6 +364,8 @@ void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *); void osd_req_write(struct osd_request *or, const struct osd_obj_id *, struct bio *data_out, u64 offset); +int osd_req_write_kern(struct osd_request *or, + const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); void osd_req_append(struct osd_request *or, const struct osd_obj_id *, struct bio *data_out);/* NI */ void osd_req_create_write(struct osd_request *or, @@ -379,6 +381,8 @@ void osd_req_flush_object(struct osd_request *or, void osd_req_read(struct osd_request *or, const struct osd_obj_id *, struct bio *data_in, u64 offset); +int osd_req_read_kern(struct osd_request *or, + const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); /* * Root/Partition/Collection/Object Attributes commands -- cgit v1.2.3 From 62f469b596dd0aadf046a69027087c18db43734e Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:04:26 +0300 Subject: [SCSI] libosd: osd_req_{read,write} takes a length parameter For supporting of chained-bios we can not inspect the first bio only, as before. Caller shall pass the total length of the request, ie. sum_bytes(bio-chain). Also since the bio might be a chain we don't set it's direction on behalf of it's callers. The bio direction should be properly set prior to this call. So fix a couple of write users that now need to set the bio direction properly [In this patch I change both library code and user sites at exofs, to make it easy on integration. It should be submitted via James's scsi-misc tree.] Signed-off-by: Boaz Harrosh CC: Jeff Garzik Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 6132790d678f..8c1e3b804afd 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -363,7 +363,7 @@ void osd_req_create_object(struct osd_request *or, struct osd_obj_id *); void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *); void osd_req_write(struct osd_request *or, - const struct osd_obj_id *, struct bio *data_out, u64 offset); + const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len); int osd_req_write_kern(struct osd_request *or, const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); void osd_req_append(struct osd_request *or, @@ -380,7 +380,7 @@ void osd_req_flush_object(struct osd_request *or, /*V2*/ u64 offset, /*V2*/ u64 len); void osd_req_read(struct osd_request *or, - const struct osd_obj_id *, struct bio *data_in, u64 offset); + const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len); int osd_req_read_kern(struct osd_request *or, const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); -- cgit v1.2.3 From fc2fac5b5f11e2bee3bf37215c8746236f5ea188 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:04:43 +0300 Subject: [SCSI] libosd: Define an osd_dev wrapper to retrieve the request_queue libosd users that need to work with bios, must sometime use the request_queue associated with the osd_dev. Make a wrapper for that, and convert all in-tree users. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 8c1e3b804afd..b44dc53bd881 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -18,6 +18,7 @@ #include "osd_types.h" #include +#include /* Note: "NI" in comments below means "Not Implemented yet" */ @@ -69,6 +70,10 @@ void osd_dev_fini(struct osd_dev *od); /* some hi level device operations */ int osd_auto_detect_ver(struct osd_dev *od, void *caps); /* GFP_KERNEL */ +static inline struct request_queue *osd_request_queue(struct osd_dev *od) +{ + return od->scsi_device->request_queue; +} /* we might want to use function vector in the future */ static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v) -- cgit v1.2.3 From 021e2230d6c04d80289fceb2d21c9ce93a615b32 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:05:05 +0300 Subject: [SCSI] osduld: use filp_open() when looking up an osd-device This patch was inspired by Al Viro, for simplifying and fixing the retrieval of osd-devices by in-kernel users, eg: file systems. In-Kernel users, now, go through the same path user-mode does by opening a file on the osd char-device and though holding a reference to both the device and the Module. A file pointer was added to the osd_dev structure which is now allocated for each user. The internal osd_dev is no longer exposed outside of the uld. I wanted to do that for a long time so each libosd user can have his own defaults on the device. The API is left the same, so user code need not change. It is no longer needed to open/close a file handle on the osd char-device from user-mode, before mounting an exofs on it. Signed-off-by: Boaz Harrosh CC: Al Viro Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index b44dc53bd881..02bd9f716357 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -48,6 +48,7 @@ enum osd_std_version { */ struct osd_dev { struct scsi_device *scsi_device; + struct file *file; unsigned def_timeout; #ifdef OSD_VER1_SUPPORT -- cgit v1.2.3 From bd2b5b12849a3446abad0b25e920f86f5480b309 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 13:40:57 +0200 Subject: perf_counter: More aggressive frequency adjustment Also employ the overflow handler to adjust the frequency, this results in a stable frequency in about 40~50 samples, instead of that many ticks. This also means we can start sampling at a sample period of 1 without running head-first into the throttle. It relies on sched_clock() to accurately measure the time difference between the overflow NMIs. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3586df840f69..282d8cc48980 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -371,6 +371,7 @@ struct hw_perf_counter { u64 freq_count; u64 freq_interrupts; + u64 freq_stamp; #endif }; -- cgit v1.2.3 From 6ff9a64d2aaa6eae396adc95e9c91c0cbfa6dbe4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Jun 2009 14:28:34 -0400 Subject: tracing: do not translate event helper macros in print format By moving the macro that creates the print format code above the defining of the event macro helpers (__get_str, __print_symbolic, and __get_dynamic_array), we get a little cleaner print format. Instead of: (char *)((void *)REC + REC->__data_loc_name) we get: __get_str(name) Instead of: ({ static const struct trace_print_flags symbols[] = { { HI_SOFTIRQ, "HI" }, { we get: __print_symbolic(REC->vec, { HI_SOFTIRQ, "HI" }, { Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 158 +++++++++++++++++++++++++------------------------ 1 file changed, 81 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 40ede4db4d88..1867553c61e5 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -80,6 +80,87 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __data_loc_##item), \ + (unsigned int)sizeof(field.__data_loc_##item)); \ + if (!ret) \ + return 0; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __entry +#define __entry REC + +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 3 of the trace events. * @@ -179,83 +260,6 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), \ - __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ - if (!ret) \ - return 0; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __entry -#define __entry REC - -#undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ -- cgit v1.2.3 From d9c7d394a8ebacb60097b192939ae9f15235225e Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Wed, 10 Jun 2009 12:57:06 -0700 Subject: block: prevent possible io_context->refcount overflow Currently io_context has an atomic_t(32-bit) as refcount. In the case of cfq, for each device against whcih a task does I/O, a reference to the io_context would be taken. And when there are multiple process sharing io_contexts(CLONE_IO) would also have a reference to the same io_context. Theoretically the possible maximum number of processes sharing the same io_context + the number of disks/cfq_data referring to the same io_context can overflow the 32-bit counter on a very high-end machine. Even though it is an improbable case, let us make it atomic_long_t. Signed-off-by: Nikanth Karthikesan Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 08b987bccf89..dd05434fa45f 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -64,7 +64,7 @@ struct cfq_io_context { * and kmalloc'ed. These could be shared between processes. */ struct io_context { - atomic_t refcount; + atomic_long_t refcount; atomic_t nr_tasks; /* all the fields below are protected by this lock */ @@ -91,8 +91,8 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) * if ref count is zero, don't allow sharing (ioc is going away, it's * a race). */ - if (ioc && atomic_inc_not_zero(&ioc->refcount)) { - atomic_inc(&ioc->nr_tasks); + if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) { + atomic_long_inc(&ioc->refcount); return ioc; } -- cgit v1.2.3 From b43d65f7e818485664037a46367cfb15af05bd8c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 9 Jun 2009 08:11:42 +0100 Subject: [ARM] 5546/1: ARM PL022 SSP/SPI driver v3 This adds a driver for the ARM PL022 PrimeCell SSP/SPI driver found in the U300 platforms as well as in some ARM reference hardware, and in a modified version on the Nomadik board. Reviewed-by: Alessandro Rubini Reviewed-by: Russell King Reviewed-by: Baruch Siach Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/pl022.h | 264 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 include/linux/amba/pl022.h (limited to 'include') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h new file mode 100644 index 000000000000..dcad0ffd1755 --- /dev/null +++ b/include/linux/amba/pl022.h @@ -0,0 +1,264 @@ +/* + * include/linux/amba/pl022.h + * + * Copyright (C) 2008-2009 ST-Ericsson AB + * Copyright (C) 2006 STMicroelectronics Pvt. Ltd. + * + * Author: Linus Walleij + * + * Initial version inspired by: + * linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c + * Initial adoption to PL022 by: + * Sachin Verma + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _SSP_PL022_H +#define _SSP_PL022_H + +#include + +/** + * whether SSP is in loopback mode or not + */ +enum ssp_loopback { + LOOPBACK_DISABLED, + LOOPBACK_ENABLED +}; + +/** + * enum ssp_interface - interfaces allowed for this SSP Controller + * @SSP_INTERFACE_MOTOROLA_SPI: Motorola Interface + * @SSP_INTERFACE_TI_SYNC_SERIAL: Texas Instrument Synchronous Serial + * interface + * @SSP_INTERFACE_NATIONAL_MICROWIRE: National Semiconductor Microwire + * interface + * @SSP_INTERFACE_UNIDIRECTIONAL: Unidirectional interface (STn8810 + * &STn8815 only) + */ +enum ssp_interface { + SSP_INTERFACE_MOTOROLA_SPI, + SSP_INTERFACE_TI_SYNC_SERIAL, + SSP_INTERFACE_NATIONAL_MICROWIRE, + SSP_INTERFACE_UNIDIRECTIONAL +}; + +/** + * enum ssp_hierarchy - whether SSP is configured as Master or Slave + */ +enum ssp_hierarchy { + SSP_MASTER, + SSP_SLAVE +}; + +/** + * enum ssp_clock_params - clock parameters, to set SSP clock at a + * desired freq + */ +struct ssp_clock_params { + u8 cpsdvsr; /* value from 2 to 254 (even only!) */ + u8 scr; /* value from 0 to 255 */ +}; + +/** + * enum ssp_rx_endian - endianess of Rx FIFO Data + */ +enum ssp_rx_endian { + SSP_RX_MSB, + SSP_RX_LSB +}; + +/** + * enum ssp_tx_endian - endianess of Tx FIFO Data + */ +enum ssp_tx_endian { + SSP_TX_MSB, + SSP_TX_LSB +}; + +/** + * enum ssp_data_size - number of bits in one data element + */ +enum ssp_data_size { + SSP_DATA_BITS_4 = 0x03, SSP_DATA_BITS_5, SSP_DATA_BITS_6, + SSP_DATA_BITS_7, SSP_DATA_BITS_8, SSP_DATA_BITS_9, + SSP_DATA_BITS_10, SSP_DATA_BITS_11, SSP_DATA_BITS_12, + SSP_DATA_BITS_13, SSP_DATA_BITS_14, SSP_DATA_BITS_15, + SSP_DATA_BITS_16, SSP_DATA_BITS_17, SSP_DATA_BITS_18, + SSP_DATA_BITS_19, SSP_DATA_BITS_20, SSP_DATA_BITS_21, + SSP_DATA_BITS_22, SSP_DATA_BITS_23, SSP_DATA_BITS_24, + SSP_DATA_BITS_25, SSP_DATA_BITS_26, SSP_DATA_BITS_27, + SSP_DATA_BITS_28, SSP_DATA_BITS_29, SSP_DATA_BITS_30, + SSP_DATA_BITS_31, SSP_DATA_BITS_32 +}; + +/** + * enum ssp_mode - SSP mode of operation (Communication modes) + */ +enum ssp_mode { + INTERRUPT_TRANSFER, + POLLING_TRANSFER, + DMA_TRANSFER +}; + +/** + * enum ssp_rx_level_trig - receive FIFO watermark level which triggers + * IT: Interrupt fires when _N_ or more elements in RX FIFO. + */ +enum ssp_rx_level_trig { + SSP_RX_1_OR_MORE_ELEM, + SSP_RX_4_OR_MORE_ELEM, + SSP_RX_8_OR_MORE_ELEM, + SSP_RX_16_OR_MORE_ELEM, + SSP_RX_32_OR_MORE_ELEM +}; + +/** + * Transmit FIFO watermark level which triggers (IT Interrupt fires + * when _N_ or more empty locations in TX FIFO) + */ +enum ssp_tx_level_trig { + SSP_TX_1_OR_MORE_EMPTY_LOC, + SSP_TX_4_OR_MORE_EMPTY_LOC, + SSP_TX_8_OR_MORE_EMPTY_LOC, + SSP_TX_16_OR_MORE_EMPTY_LOC, + SSP_TX_32_OR_MORE_EMPTY_LOC +}; + +/** + * enum SPI Clock Phase - clock phase (Motorola SPI interface only) + * @SSP_CLK_RISING_EDGE: Receive data on rising edge + * @SSP_CLK_FALLING_EDGE: Receive data on falling edge + */ +enum ssp_spi_clk_phase { + SSP_CLK_RISING_EDGE, + SSP_CLK_FALLING_EDGE +}; + +/** + * enum SPI Clock Polarity - clock polarity (Motorola SPI interface only) + * @SSP_CLK_POL_IDLE_LOW: Low inactive level + * @SSP_CLK_POL_IDLE_HIGH: High inactive level + */ +enum ssp_spi_clk_pol { + SSP_CLK_POL_IDLE_LOW, + SSP_CLK_POL_IDLE_HIGH +}; + +/** + * Microwire Conrol Lengths Command size in microwire format + */ +enum ssp_microwire_ctrl_len { + SSP_BITS_4 = 0x03, SSP_BITS_5, SSP_BITS_6, + SSP_BITS_7, SSP_BITS_8, SSP_BITS_9, + SSP_BITS_10, SSP_BITS_11, SSP_BITS_12, + SSP_BITS_13, SSP_BITS_14, SSP_BITS_15, + SSP_BITS_16, SSP_BITS_17, SSP_BITS_18, + SSP_BITS_19, SSP_BITS_20, SSP_BITS_21, + SSP_BITS_22, SSP_BITS_23, SSP_BITS_24, + SSP_BITS_25, SSP_BITS_26, SSP_BITS_27, + SSP_BITS_28, SSP_BITS_29, SSP_BITS_30, + SSP_BITS_31, SSP_BITS_32 +}; + +/** + * enum Microwire Wait State + * @SSP_MWIRE_WAIT_ZERO: No wait state inserted after last command bit + * @SSP_MWIRE_WAIT_ONE: One wait state inserted after last command bit + */ +enum ssp_microwire_wait_state { + SSP_MWIRE_WAIT_ZERO, + SSP_MWIRE_WAIT_ONE +}; + +/** + * enum Microwire - whether Full/Half Duplex + * @SSP_MICROWIRE_CHANNEL_FULL_DUPLEX: SSPTXD becomes bi-directional, + * SSPRXD not used + * @SSP_MICROWIRE_CHANNEL_HALF_DUPLEX: SSPTXD is an output, SSPRXD is + * an input. + */ +enum ssp_duplex { + SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, + SSP_MICROWIRE_CHANNEL_HALF_DUPLEX +}; + +/** + * CHIP select/deselect commands + */ +enum ssp_chip_select { + SSP_CHIP_SELECT, + SSP_CHIP_DESELECT +}; + + +/** + * struct pl022_ssp_master - device.platform_data for SPI controller devices. + * @num_chipselect: chipselects are used to distinguish individual + * SPI slaves, and are numbered from zero to num_chipselects - 1. + * each slave has a chipselect signal, but it's common that not + * every chipselect is connected to a slave. + * @enable_dma: if true enables DMA driven transfers. + */ +struct pl022_ssp_controller { + u16 bus_id; + u8 num_chipselect; + u8 enable_dma:1; +}; + +/** + * struct ssp_config_chip - spi_board_info.controller_data for SPI + * slave devices, copied to spi_device.controller_data. + * + * @lbm: used for test purpose to internally connect RX and TX + * @iface: Interface type(Motorola, TI, Microwire, Universal) + * @hierarchy: sets whether interface is master or slave + * @slave_tx_disable: SSPTXD is disconnected (in slave mode only) + * @clk_freq: Tune freq parameters of SSP(when in master mode) + * @endian_rx: Endianess of Data in Rx FIFO + * @endian_tx: Endianess of Data in Tx FIFO + * @data_size: Width of data element(4 to 32 bits) + * @com_mode: communication mode: polling, Interrupt or DMA + * @rx_lev_trig: Rx FIFO watermark level (for IT & DMA mode) + * @tx_lev_trig: Tx FIFO watermark level (for IT & DMA mode) + * @clk_phase: Motorola SPI interface Clock phase + * @clk_pol: Motorola SPI interface Clock polarity + * @ctrl_len: Microwire interface: Control length + * @wait_state: Microwire interface: Wait state + * @duplex: Microwire interface: Full/Half duplex + * @cs_control: function pointer to board-specific function to + * assert/deassert I/O port to control HW generation of devices chip-select. + * @dma_xfer_type: Type of DMA xfer (Mem-to-periph or Periph-to-Periph) + * @dma_config: DMA configuration for SSP controller and peripheral + */ +struct pl022_config_chip { + struct device *dev; + enum ssp_loopback lbm; + enum ssp_interface iface; + enum ssp_hierarchy hierarchy; + bool slave_tx_disable; + struct ssp_clock_params clk_freq; + enum ssp_rx_endian endian_rx; + enum ssp_tx_endian endian_tx; + enum ssp_data_size data_size; + enum ssp_mode com_mode; + enum ssp_rx_level_trig rx_lev_trig; + enum ssp_tx_level_trig tx_lev_trig; + enum ssp_spi_clk_phase clk_phase; + enum ssp_spi_clk_pol clk_pol; + enum ssp_microwire_ctrl_len ctrl_len; + enum ssp_microwire_wait_state wait_state; + enum ssp_duplex duplex; + void (*cs_control) (u32 control); +}; + +#endif /* _SSP_PL022_H */ -- cgit v1.2.3 From df1a132bf3d3508f863336c80a27806a2ac947e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:02:22 +0200 Subject: perf_counter: Introduce struct for sample data For easy extension of the sample data, put it in a structure. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 282d8cc48980..d8c0eb480f9a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -605,8 +605,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_counter_context *ctx, int cpu); extern void perf_counter_update_userpage(struct perf_counter *counter); -extern int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs, u64 addr); +struct perf_sample_data { + struct pt_regs *regs; + u64 addr; +}; + +extern int perf_counter_overflow(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); + /* * Return 1 for a software counter, 0 for a hardware counter */ -- cgit v1.2.3 From 9e350de37ac9607012fcf9c5314a28fbddf8f43c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:34:59 +0200 Subject: perf_counter: Accurate period data We currently log hw.sample_period for PERF_SAMPLE_PERIOD, however this is incorrect. When we adjust the period, it will only take effect the next cycle but report it for the current cycle. So when we adjust the period for every cycle, we're always wrong. Solve this by keeping track of the last_period. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d8c0eb480f9a..5b966472b458 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -366,6 +366,7 @@ struct hw_perf_counter { }; atomic64_t prev_count; u64 sample_period; + u64 last_period; atomic64_t period_left; u64 interrupts; @@ -606,8 +607,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); struct perf_sample_data { - struct pt_regs *regs; - u64 addr; + struct pt_regs *regs; + u64 addr; + u64 period; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, -- cgit v1.2.3 From dcae3626d031fe6296b1e96a16f986193a41f840 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 10 Jun 2009 12:43:48 -0700 Subject: drm: fix LOCK_TEST_WITH_RETURN macro When this macro isn't called with 'file_priv' this will result in a build failure. Signed-off-by: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- include/drm/drmP.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index b84d8ae35e6f..efa5f79a35c7 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -237,15 +237,15 @@ struct drm_device; * \param dev DRM device. * \param filp file pointer of the caller. */ -#define LOCK_TEST_WITH_RETURN( dev, file_priv ) \ -do { \ - if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock) || \ - file_priv->master->lock.file_priv != file_priv) { \ +#define LOCK_TEST_WITH_RETURN( dev, _file_priv ) \ +do { \ + if (!_DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock) || \ + _file_priv->master->lock.file_priv != _file_priv) { \ DRM_ERROR( "%s called without lock held, held %d owner %p %p\n",\ - __func__, _DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock),\ - file_priv->master->lock.file_priv, file_priv); \ - return -EINVAL; \ - } \ + __func__, _DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock),\ + _file_priv->master->lock.file_priv, _file_priv); \ + return -EINVAL; \ + } \ } while (0) /** -- cgit v1.2.3 From 4fefcb27050b98c97b1c32bc710fc2f874449dee Mon Sep 17 00:00:00 2001 From: yakui_zhao Date: Tue, 2 Jun 2009 14:09:47 +0800 Subject: drm: add separate drm debugging levels Now all the DRM debug info will be reported if the boot option of "drm.debug=1" is added. Sometimes it is inconvenient to get the debug info in KMS mode. We will get too much unrelated info. This will separate several DRM debug levels and the debug level can be used to print the different debug info. And the debug level is controlled by the module parameter of drm.debug In this patch it is divided into four debug levels; drm_core, drm_driver, drm_kms, drm_mode. At the same time we can get the different debug info by changing the debug level. This can be done by adding the module parameter. Of course it can be changed through the /sys/module/drm/parameters/debug after the system is booted. Four debug macro definitions are provided. DRM_DEBUG(fmt, args...) DRM_DEBUG_DRIVER(prefix, fmt, args...) DRM_DEBUG_KMS(prefix, fmt, args...) DRM_DEBUG_MODE(prefix, fmt, args...) When the boot option of "drm.debug=4" is added, it will print the debug info using DRM_DEBUG_KMS macro definition. When the boot option of "drm.debug=6" is added, it will print the debug info using DRM_DEBUG_KMS/DRM_DEBUG_DRIVER. Sometimes we expect to print the value of an array. For example: SDVO command, In such case the following four DRM debug macro definitions are added: DRM_LOG(fmt, args...) DRM_LOG_DRIVER(fmt, args...) DRM_LOG_KMS(fmt, args...) DRM_LOG_MODE(fmt, args...) Signed-off-by: Zhao Yakui Signed-off-by: Dave Airlie --- include/drm/drmP.h | 61 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index efa5f79a35c7..afc21685230e 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -87,6 +87,15 @@ struct drm_device; #include "drm_os_linux.h" #include "drm_hashtab.h" +#define DRM_UT_CORE 0x01 +#define DRM_UT_DRIVER 0x02 +#define DRM_UT_KMS 0x04 +#define DRM_UT_MODE 0x08 + +extern void drm_ut_debug_printk(unsigned int request_level, + const char *prefix, + const char *function_name, + const char *format, ...); /***********************************************************************/ /** \name DRM template customization defaults */ /*@{*/ @@ -186,15 +195,57 @@ struct drm_device; * \param arg arguments */ #if DRM_DEBUG_CODE -#define DRM_DEBUG(fmt, arg...) \ +#define DRM_DEBUG(fmt, args...) \ do { \ - if ( drm_debug ) \ - printk(KERN_DEBUG \ - "[" DRM_NAME ":%s] " fmt , \ - __func__ , ##arg); \ + drm_ut_debug_printk(DRM_UT_CORE, DRM_NAME, \ + __func__, fmt, ##args); \ + } while (0) + +#define DRM_DEBUG_DRIVER(prefix, fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_DRIVER, prefix, \ + __func__, fmt, ##args); \ + } while (0) +#define DRM_DEBUG_KMS(prefix, fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_KMS, prefix, \ + __func__, fmt, ##args); \ + } while (0) +#define DRM_DEBUG_MODE(prefix, fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_MODE, prefix, \ + __func__, fmt, ##args); \ + } while (0) +#define DRM_LOG(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_CORE, NULL, \ + NULL, fmt, ##args); \ + } while (0) +#define DRM_LOG_KMS(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_KMS, NULL, \ + NULL, fmt, ##args); \ + } while (0) +#define DRM_LOG_MODE(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_MODE, NULL, \ + NULL, fmt, ##args); \ + } while (0) +#define DRM_LOG_DRIVER(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_DRIVER, NULL, \ + NULL, fmt, ##args); \ } while (0) #else +#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0) +#define DRM_DEBUG_KMS(prefix, fmt, args...) do { } while (0) +#define DRM_DEBUG_MODE(prefix, fmt, args...) do { } while (0) #define DRM_DEBUG(fmt, arg...) do { } while (0) +#define DRM_LOG(fmt, arg...) do { } while (0) +#define DRM_LOG_KMS(fmt, args...) do { } while (0) +#define DRM_LOG_MODE(fmt, arg...) do { } while (0) +#define DRM_LOG_DRIVER(fmt, arg...) do { } while (0) + #endif #define DRM_PROC_LIMIT (PAGE_SIZE-80) -- cgit v1.2.3 From b0fd271d5fba0b2d00888363f3869e3f9b26caa9 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 11 Jun 2009 13:10:16 +0200 Subject: block: add request clone interface (v2) This patch adds the following 2 interfaces for request-stacking drivers: - blk_rq_prep_clone(struct request *clone, struct request *orig, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio*, void *), void *data) * Clones bios in the original request to the clone request (bio_ctr is called for each cloned bios.) * Copies attributes of the original request to the clone request. The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. - blk_rq_unprep_clone(struct request *clone) * Frees cloned bios from the clone request. Request stacking drivers (e.g. request-based dm) need to make a clone request for a submitted request and dispatch it to other devices. To allocate request for the clone, request stacking drivers may not be able to use blk_get_request() because the allocation may be done in an irq-disabled context. So blk_rq_prep_clone() takes a request allocated by the caller as an argument. For each clone bio in the clone request, request stacking drivers should be able to set up their own completion handler. So blk_rq_prep_clone() takes a callback function which is called for each clone bio, and a pointer for private data which is passed to the callback. NOTE: blk_rq_prep_clone() doesn't copy any actual data of the original request. Pages are shared between original bios and cloned bios. So caller must not complete the original request before the clone request. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5e740a135e73..ebdfde8fe556 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -765,6 +765,11 @@ extern void blk_insert_request(struct request_queue *, struct request *, int, vo extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); +extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data); +extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_plug_device(struct request_queue *); -- cgit v1.2.3 From 0764771dab80d7b84b9a271bee7f1b21a04a3f0c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 11:18:36 +0200 Subject: perf_counter: More paranoia settings Rename the perf_counter_priv knob to perf_counter_paranoia (because priv can be read as private, as opposed to privileged) and provide one more level: 0 - permissive 1 - restrict cpu counters to privilidged contexts 2 - restrict kernel-mode code counting and profiling Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5b966472b458..386be915baa1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -648,7 +648,7 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); -extern int sysctl_perf_counter_priv; +extern int sysctl_perf_counter_paranoid; extern int sysctl_perf_counter_mlock; extern int sysctl_perf_counter_limit; -- cgit v1.2.3 From df58ab24bf26b166874bfb18b3b5a2e0a8e63179 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 11:25:05 +0200 Subject: perf_counter: Rename perf_counter_limit sysctl Rename perf_counter_limit to perf_counter_max_sample_rate and prohibit creation of counters with a known higher sample frequency. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 386be915baa1..95c797c480e8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -650,7 +650,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_paranoid; extern int sysctl_perf_counter_mlock; -extern int sysctl_perf_counter_limit; +extern int sysctl_perf_counter_sample_rate; extern void perf_counter_init(void); -- cgit v1.2.3 From fcc8ac1825d3d0fb81f73bc1a80ebc863168bb56 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:24:17 +0100 Subject: tty: Add carrier processing on close to the tty_port core Some drivers implement this internally, others miss it out. Push the behaviour into the core code as that way everyone will do it consistently. Update the dtr rts method to raise or lower depending upon flags. Having a single method in this style fits most of the implementations more cleanly than two funtions. We need this in place before we tackle the USB side Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index fc39db95499f..98694364c96f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -185,7 +185,7 @@ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - void (*raise_dtr_rts)(struct tty_port *port); + void (*dtr_rts)(struct tty_port *port, int raise); }; struct tty_port { @@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); -- cgit v1.2.3 From 1ec739be75a6cb961a46ba0b1982d0edb7f27558 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:25:25 +0100 Subject: tty: Implement a drain delay in the tty port We need this for devices that cannot flush and wait, but which do not order data and modem events. Without it we will hang up before all the data clears the hardware. Needed for the USB changes. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 98694364c96f..bed5a3d40307 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -201,6 +201,9 @@ struct tty_port { unsigned char *xmit_buf; /* Optional buffer */ int close_delay; /* Close port delay */ int closing_wait; /* Delay for output */ + int drain_delay; /* Set to zero if no pure time + based drain is needed else + set to size of fifo */ }; /* -- cgit v1.2.3 From 335f8514f200e63d689113d29cb7253a5c282967 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:26:29 +0100 Subject: tty: Bring the usb tty port structure into more use This allows us to clean stuff up, but is probably also going to cause some app breakage with buggy apps as we now implement proper POSIX behaviour for USB ports matching all the other ports. This does also mean other apps that break on USB will now work properly. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/usb/serial.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 625e9e4639c6..8cdfed738fe4 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -224,8 +224,7 @@ struct usb_serial_driver { /* Called by console with tty = NULL and by tty */ int (*open)(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); - void (*close)(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); + void (*close)(struct usb_serial_port *port); int (*write)(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); /* Called only by the tty layer */ @@ -241,6 +240,10 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + /* Called by the tty layer for port level work. There may or may not + be an attached tty at this point */ + void (*dtr_rts)(struct usb_serial_port *port, int on); + int (*carrier_raised)(struct usb_serial_port *port); /* USB events */ void (*read_int_callback)(struct urb *urb); void (*write_int_callback)(struct urb *urb); @@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); extern int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); -extern void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +extern void usb_serial_generic_close(struct usb_serial_port *port); extern int usb_serial_generic_resume(struct usb_serial *serial); extern int usb_serial_generic_write_room(struct tty_struct *tty); extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); -- cgit v1.2.3 From 97e87f8ebe978e881c7325ba490574bd5500b133 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:29:27 +0100 Subject: tty: cyclades, plx9060 casts cleanup Remove ugly all-over-the-code casts of ctl_addr to 9060 space. Add an union to the cyclades_card structure, which contains a pointer to both 9050 and 9060 spaces. The 9050 space layout is unknown, so let it still as a void __iomem pointer. Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/cyclades.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 788850ba4e75..9ae03d5b3590 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -507,16 +507,19 @@ struct ZFW_CTRL { /* Per card data structure */ struct cyclades_card { - void __iomem *base_addr; - void __iomem *ctl_addr; - int irq; - unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ - unsigned int first_line; /* minor number of first channel on card */ - unsigned int nports; /* Number of ports in the card */ - int bus_index; /* address shift - 0 for ISA, 1 for PCI */ - int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ - spinlock_t card_lock; - struct cyclades_port *ports; + void __iomem *base_addr; + union { + void __iomem *p9050; + struct RUNTIME_9060 __iomem *p9060; + } ctl_addr; + int irq; + unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ + unsigned int first_line; /* minor number of first channel on card */ + unsigned int nports; /* Number of ports in the card */ + int bus_index; /* address shift - 0 for ISA, 1 for PCI */ + int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + spinlock_t card_lock; + struct cyclades_port *ports; }; /*************************************** -- cgit v1.2.3 From 101b81590d8df0a74c33cf739886247c0a13f4af Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:30:10 +0100 Subject: tty: cyclades, cache HW version Store HW version locally to not read it all the time in interrupts and alike. Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/cyclades.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 9ae03d5b3590..a14fe3079761 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -518,6 +518,7 @@ struct cyclades_card { unsigned int nports; /* Number of ports in the card */ int bus_index; /* address shift - 0 for ISA, 1 for PCI */ int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + u32 hw_ver; spinlock_t card_lock; struct cyclades_port *ports; }; -- cgit v1.2.3 From 08a951e1693e324bd035b194002a82b9057fd9c5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:33:40 +0100 Subject: tty: cyclades, remove typedefs They are unused anyway. Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/cyclades.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index a14fe3079761..1fbdea4f08eb 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL { #ifndef DP_WINDOW_SIZE -/* #include "cyclomz.h" */ -/****************** ****************** *******************/ -/* - * The data types defined below are used in all ZFIRM interface - * data structures. They accomodate differences between HW - * architectures and compilers. - */ - -typedef __u64 ucdouble; /* 64 bits, unsigned */ -typedef __u32 uclong; /* 32 bits, unsigned */ -typedef __u16 ucshort; /* 16 bits, unsigned */ -typedef __u8 ucchar; /* 8 bits, unsigned */ - /* * Memory Window Sizes */ -- cgit v1.2.3 From 70beaed22cbe12979e55d99b370e147e2e168562 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:39:12 +0100 Subject: serial: refactor ASYNC_ flags Define ASYNCB_* flags which are bit numbers of the ASYNC_* flags. This is useful for {test,set,clear}_bit. Also convert each ASYNC_% to be (1 << ASYNCB_%) and define masks with the macros, not constants. Tested with: #include "PATH_TO_KERNEL/include/linux/serial.h" static struct { unsigned int new, old; } as[] = { { ASYNC_HUP_NOTIFY, 0x0001 }, { ASYNC_FOURPORT, 0x0002 }, ... { ASYNC_BOOT_ONLYMCA, 0x00400000 }, { ASYNC_INTERNAL_FLAGS, 0xFFC00000 } }; ... for (a = 0; a < ARRAY_SIZE(as); a++) if (as[a].old != as[a].new) printf("%.8x != %.8x\n", as[a].old, as[a].new); Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial.h | 116 +++++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/serial.h b/include/linux/serial.h index 9136cc5608c3..e5bb75a63802 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -96,54 +96,76 @@ struct serial_uart_config { /* * Definitions for async_struct (and serial_struct) flags field + * + * Define ASYNCB_* for convenient use with {test,set,clear}_bit. */ -#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes - on the callout port */ -#define ASYNC_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ -#define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ -#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ - -#define ASYNC_SPD_MASK 0x1030 -#define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ - -#define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ -#define ASYNC_SPD_CUST 0x0030 /* Use user-specified divisor */ - -#define ASYNC_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ -#define ASYNC_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ -#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ -#define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ -#define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ - -#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ - -#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ -#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ - -#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */ - -#define ASYNC_BUGGY_UART 0x4000 /* This is a buggy UART, skip some safety - * checks. Note: can be dangerous! */ - -#define ASYNC_AUTOPROBE 0x8000 /* Port was autoprobed by PCI or PNP code */ - -#define ASYNC_FLAGS 0x7FFF /* Possible legal async flags */ -#define ASYNC_USR_MASK 0x3430 /* Legal flags that non-privileged - * users can set or reset */ - -/* Internal flags used only by kernel/chr_drv/serial.c */ -#define ASYNC_INITIALIZED 0x80000000 /* Serial port was initialized */ -#define ASYNC_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ -#define ASYNC_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ -#define ASYNC_CLOSING 0x08000000 /* Serial port is closing */ -#define ASYNC_CTS_FLOW 0x04000000 /* Do CTS flow control */ -#define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ -#define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards - --- no longer used */ -#define ASYNC_CONS_FLOW 0x00800000 /* flow control for console */ - -#define ASYNC_BOOT_ONLYMCA 0x00400000 /* Probe only if MCA bus */ -#define ASYNC_INTERNAL_FLAGS 0xFFC00000 /* Internal flags */ +#define ASYNCB_HUP_NOTIFY 0 /* Notify getty on hangups and closes + * on the callout port */ +#define ASYNCB_FOURPORT 1 /* Set OU1, OUT2 per AST Fourport settings */ +#define ASYNCB_SAK 2 /* Secure Attention Key (Orange book) */ +#define ASYNCB_SPLIT_TERMIOS 3 /* Separate termios for dialin/callout */ +#define ASYNCB_SPD_HI 4 /* Use 56000 instead of 38400 bps */ +#define ASYNCB_SPD_VHI 5 /* Use 115200 instead of 38400 bps */ +#define ASYNCB_SKIP_TEST 6 /* Skip UART test during autoconfiguration */ +#define ASYNCB_AUTO_IRQ 7 /* Do automatic IRQ during + * autoconfiguration */ +#define ASYNCB_SESSION_LOCKOUT 8 /* Lock out cua opens based on session */ +#define ASYNCB_PGRP_LOCKOUT 9 /* Lock out cua opens based on pgrp */ +#define ASYNCB_CALLOUT_NOHUP 10 /* Don't do hangups for cua device */ +#define ASYNCB_HARDPPS_CD 11 /* Call hardpps when CD goes high */ +#define ASYNCB_SPD_SHI 12 /* Use 230400 instead of 38400 bps */ +#define ASYNCB_LOW_LATENCY 13 /* Request low latency behaviour */ +#define ASYNCB_BUGGY_UART 14 /* This is a buggy UART, skip some safety + * checks. Note: can be dangerous! */ +#define ASYNCB_AUTOPROBE 15 /* Port was autoprobed by PCI or PNP code */ +#define ASYNCB_LAST_USER 15 + +/* Internal flags used only by kernel */ +#define ASYNCB_INITIALIZED 31 /* Serial port was initialized */ +#define ASYNCB_NORMAL_ACTIVE 29 /* Normal device is active */ +#define ASYNCB_BOOT_AUTOCONF 28 /* Autoconfigure port on bootup */ +#define ASYNCB_CLOSING 27 /* Serial port is closing */ +#define ASYNCB_CTS_FLOW 26 /* Do CTS flow control */ +#define ASYNCB_CHECK_CD 25 /* i.e., CLOCAL */ +#define ASYNCB_SHARE_IRQ 24 /* for multifunction cards, no longer used */ +#define ASYNCB_CONS_FLOW 23 /* flow control for console */ +#define ASYNCB_BOOT_ONLYMCA 22 /* Probe only if MCA bus */ +#define ASYNCB_FIRST_KERNEL 22 + +#define ASYNC_HUP_NOTIFY (1U << ASYNCB_HUP_NOTIFY) +#define ASYNC_FOURPORT (1U << ASYNCB_FOURPORT) +#define ASYNC_SAK (1U << ASYNCB_SAK) +#define ASYNC_SPLIT_TERMIOS (1U << ASYNCB_SPLIT_TERMIOS) +#define ASYNC_SPD_HI (1U << ASYNCB_SPD_HI) +#define ASYNC_SPD_VHI (1U << ASYNCB_SPD_VHI) +#define ASYNC_SKIP_TEST (1U << ASYNCB_SKIP_TEST) +#define ASYNC_AUTO_IRQ (1U << ASYNCB_AUTO_IRQ) +#define ASYNC_SESSION_LOCKOUT (1U << ASYNCB_SESSION_LOCKOUT) +#define ASYNC_PGRP_LOCKOUT (1U << ASYNCB_PGRP_LOCKOUT) +#define ASYNC_CALLOUT_NOHUP (1U << ASYNCB_CALLOUT_NOHUP) +#define ASYNC_HARDPPS_CD (1U << ASYNCB_HARDPPS_CD) +#define ASYNC_SPD_SHI (1U << ASYNCB_SPD_SHI) +#define ASYNC_LOW_LATENCY (1U << ASYNCB_LOW_LATENCY) +#define ASYNC_BUGGY_UART (1U << ASYNCB_BUGGY_UART) +#define ASYNC_AUTOPROBE (1U << ASYNCB_AUTOPROBE) + +#define ASYNC_FLAGS ((1U << ASYNCB_LAST_USER) - 1) +#define ASYNC_USR_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI| \ + ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY) +#define ASYNC_SPD_CUST (ASYNC_SPD_HI|ASYNC_SPD_VHI) +#define ASYNC_SPD_WARP (ASYNC_SPD_HI|ASYNC_SPD_SHI) +#define ASYNC_SPD_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI) + +#define ASYNC_INITIALIZED (1U << ASYNCB_INITIALIZED) +#define ASYNC_NORMAL_ACTIVE (1U << ASYNCB_NORMAL_ACTIVE) +#define ASYNC_BOOT_AUTOCONF (1U << ASYNCB_BOOT_AUTOCONF) +#define ASYNC_CLOSING (1U << ASYNCB_CLOSING) +#define ASYNC_CTS_FLOW (1U << ASYNCB_CTS_FLOW) +#define ASYNC_CHECK_CD (1U << ASYNCB_CHECK_CD) +#define ASYNC_SHARE_IRQ (1U << ASYNCB_SHARE_IRQ) +#define ASYNC_CONS_FLOW (1U << ASYNCB_CONS_FLOW) +#define ASYNC_BOOT_ONLYMCA (1U << ASYNCB_BOOT_ONLYMCA) +#define ASYNC_INTERNAL_FLAGS (~((1U << ASYNCB_FIRST_KERNEL) - 1)) /* * Multiport serial configuration structure --- external structure -- cgit v1.2.3 From 70fd8fdecc4430ffcede7704dd812d4054d1faf9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 11 Jun 2009 12:41:57 +0100 Subject: 8250_pci: add the OXCB950 chip to the 8250 PCI driver. This adds support for the following serial controller chip: Oxford Semiconductor OXCB950 for PCI Cardbus interface http://www.transdimension.com/products/serial/OXCB950.html on this card: ExSys EX-1370 1 port high-speed serial card for ExpressCard/34 slot Signed-off-by: Andre Przywara Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d3..d7d1c41a0b17 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1996,10 +1996,12 @@ #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 +#define PCI_DEVICE_ID_OXSEMI_C950 0x950B #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 +#define PCI_SUBDEVICE_ID_OXSEMI_C950 0x0001 #define PCI_VENDOR_ID_CHELSIO 0x1425 -- cgit v1.2.3 From 38db89799bdf11625a831c5af33938dcb11908b6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:44:17 +0100 Subject: tty: throttling race fix The tty throttling code can race due to the lock drops. It takes very high loads but this has been observed and verified by Rob Duncan. The basic problem is that on an SMP box we can go CPU #1 CPU #2 need to throttle ? suppose we should buffer space cleared are we throttled yes ? - unthrottle call throttle method This changeet take the termios lock to protect against this. The termios lock isn't the initial obvious candidate but many implementations of throttle methods already need to poke around their own termios structures (and nobody really locks them against a racing change of flow control). This does mean that anyone who is setting tty->low_latency = 1 and then calling tty_flip_buffer_push from their unthrottle method is going to end up collapsing in a pile of locks. However we've removed all the known bogus users of low_latency = 1 and such use isn't safe anyway for other reasons so catching it would be an improvement. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty_driver.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index bcba84ea2d86..3566129384a4 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -127,7 +127,8 @@ * the line discipline are close to full, and it should somehow * signal that no more characters should be sent to the tty. * - * Optional: Always invoke via tty_throttle(); + * Optional: Always invoke via tty_throttle(), called under the + * termios lock. * * void (*unthrottle)(struct tty_struct * tty); * @@ -135,7 +136,8 @@ * that characters can now be sent to the tty without fear of * overrunning the input buffers of the line disciplines. * - * Optional: Always invoke via tty_unthrottle(); + * Optional: Always invoke via tty_unthrottle(), called under the + * termios lock. * * void (*stop)(struct tty_struct *tty); * -- cgit v1.2.3 From e8b70e7d3e86319a8b2aaabde3866833d92cd80f Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:48:02 +0100 Subject: tty: Extract various bits of ldisc code Before trying to tackle the ldisc bugs the code needs to be a good deal more readable, so do the simple extractions of routines first. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index bed5a3d40307..f9c13c83790c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -428,6 +428,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx, extern void tty_release_dev(struct file *filp); extern int tty_init_termios(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); + extern struct mutex tty_mutex; extern void tty_write_unlock(struct tty_struct *tty); -- cgit v1.2.3 From c65c9bc3efa5589f691276bb9db689119a711222 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:50:12 +0100 Subject: tty: rewrite the ldisc locking There are several pretty much unfixable races in the old ldisc code, especially with respect to pty behaviour and also to hangup. It's easier to rewrite the code than simply try and patch it up. This patch - splits the ldisc from the tty (so we will be able to refcount it more cleanly later) - introduces a mutex lock for ldisc changing on an active device - fixes the complete mess that hangup caused - implements hopefully correct setldisc/close/hangup locking There are still some problems around pty pairs that have always been there but at least it is now possible to understand the code and fix further problems. This fixes the following known bugs - hang up can leak ldisc references - hang up may not call open/close on ldisc in a matched way - pty/tty pairs can deadlock during an ldisc change - reading the ldisc proc files can cause every ldisc to be loaded and probably a few other of the mysterious ldisc race reports. I'm sure it also adds the odd new one. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index f9c13c83790c..1488d8c81aac 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -226,8 +226,11 @@ struct tty_struct { struct tty_driver *driver; const struct tty_operations *ops; int index; - /* The ldisc objects are protected by tty_ldisc_lock at the moment */ - struct tty_ldisc ldisc; + + /* Protects ldisc changes: Lock tty not pty */ + struct mutex ldisc_mutex; + struct tty_ldisc *ldisc; + struct mutex termios_mutex; spinlock_t ctrl_lock; /* Termios values are protected by the termios mutex */ @@ -314,6 +317,7 @@ struct tty_struct { #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ #define TTY_LDISC_CHANGING 10 /* Line discipline changing */ +#define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ @@ -406,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); +extern void tty_ldisc_hangup(struct tty_struct *tty); extern const struct file_operations tty_ldiscs_proc_fops; extern void tty_wakeup(struct tty_struct *tty); -- cgit v1.2.3 From 08e0992f60ad44025a8a8b8a821838ca4a562686 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 11 Jun 2009 13:21:24 +0100 Subject: serial: add support for the TI AR7 internal UART This patch adds support for the TI AR7 internal UART. Signed-off-by: Florian Fainelli Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 57a97e52e58d..48766ea845cf 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -41,7 +41,8 @@ #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ -#define PORT_MAX_8250 17 /* max port ID */ +#define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ +#define PORT_MAX_8250 18 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3 From 34aec591847c696339189b070cce2a11f901cfea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20R=C3=B6jfors?= Date: Thu, 11 Jun 2009 14:05:39 +0100 Subject: serial: Added Timberdale UART driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver for the UART found in the Timberdale FPGA Signed-off-by: Richard Röjfors Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 48766ea845cf..6fd80c4243f1 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -168,6 +168,9 @@ /* MAX3100 */ #define PORT_MAX3100 86 +/* Timberdale UART */ +#define PORT_TIMBUART 87 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From 8759ef32d992fc6c0bcbe40fca7aa302190918a5 Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Thu, 11 Jun 2009 14:51:15 +0100 Subject: lib: isolate rational fractions helper function Provide a helper function to determine optimum numerator denominator value pairs taking into account restricted register size. Useful especially with PLL and other clock configurations. Signed-off-by: Oskar Schirmer Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/rational.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/rational.h (limited to 'include') diff --git a/include/linux/rational.h b/include/linux/rational.h new file mode 100644 index 000000000000..4f532fcd9eea --- /dev/null +++ b/include/linux/rational.h @@ -0,0 +1,19 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * + * helper functions when coping with rational numbers, + * e.g. when calculating optimum numerator/denominator pairs for + * pll configuration taking into account restricted register size + */ + +#ifndef _LINUX_RATIONAL_H +#define _LINUX_RATIONAL_H + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator); + +#endif /* _LINUX_RATIONAL_H */ -- cgit v1.2.3 From 1c432d899d32d36371ee4ee310fa3609cf0e5742 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 13:19:29 +0200 Subject: perf_counter: Rename enums Rename the perf enums to be in the 'perf_' namespace and strictly enumerate the ABI bits. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 53 +++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 95c797c480e8..d5911b02bc8c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -24,24 +24,21 @@ /* * attr.type */ -enum perf_event_types { +enum perf_type_id { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, - /* - * available TYPE space, raw is the max value. - */ - - PERF_TYPE_RAW = 128, + PERF_TYPE_MAX, /* non ABI */ }; /* * Generalized performance counter event types, used by the attr.event_id * parameter of the sys_perf_counter_open() syscall: */ -enum attr_ids { +enum perf_hw_id { /* * Common hardware events, generalized by the kernel: */ @@ -53,7 +50,7 @@ enum attr_ids { PERF_COUNT_BRANCH_MISSES = 5, PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 7, + PERF_HW_EVENTS_MAX, /* non ABI */ }; /* @@ -63,30 +60,30 @@ enum attr_ids { * { read, write, prefetch } x * { accesses, misses } */ -enum hw_cache_id { - PERF_COUNT_HW_CACHE_L1D, - PERF_COUNT_HW_CACHE_L1I, - PERF_COUNT_HW_CACHE_L2, - PERF_COUNT_HW_CACHE_DTLB, - PERF_COUNT_HW_CACHE_ITLB, - PERF_COUNT_HW_CACHE_BPU, - - PERF_COUNT_HW_CACHE_MAX, +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_L2 = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non ABI */ }; -enum hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ, - PERF_COUNT_HW_CACHE_OP_WRITE, - PERF_COUNT_HW_CACHE_OP_PREFETCH, +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - PERF_COUNT_HW_CACHE_OP_MAX, + PERF_COUNT_HW_CACHE_OP_MAX, /* non ABI */ }; -enum hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS, - PERF_COUNT_HW_CACHE_RESULT_MISS, +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - PERF_COUNT_HW_CACHE_RESULT_MAX, + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non ABI */ }; /* @@ -95,7 +92,7 @@ enum hw_cache_op_result_id { * physical and sw events of the kernel (and allow the profiling of them as * well): */ -enum sw_event_ids { +enum perf_sw_ids { PERF_COUNT_CPU_CLOCK = 0, PERF_COUNT_TASK_CLOCK = 1, PERF_COUNT_PAGE_FAULTS = 2, @@ -104,7 +101,7 @@ enum sw_event_ids { PERF_COUNT_PAGE_FAULTS_MIN = 5, PERF_COUNT_PAGE_FAULTS_MAJ = 6, - PERF_SW_EVENTS_MAX = 7, + PERF_SW_EVENTS_MAX, /* non ABI */ }; /* -- cgit v1.2.3 From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:06:28 +0200 Subject: perf_counter: Standardize event names Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d5911b02bc8c..887df88a9c2a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -42,15 +42,15 @@ enum perf_hw_id { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - - PERF_HW_EVENTS_MAX, /* non ABI */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non ABI */ }; /* @@ -93,15 +93,15 @@ enum perf_hw_cache_op_result_id { * well): */ enum perf_sw_ids { - PERF_COUNT_CPU_CLOCK = 0, - PERF_COUNT_TASK_CLOCK = 1, - PERF_COUNT_PAGE_FAULTS = 2, - PERF_COUNT_CONTEXT_SWITCHES = 3, - PERF_COUNT_CPU_MIGRATIONS = 4, - PERF_COUNT_PAGE_FAULTS_MIN = 5, - PERF_COUNT_PAGE_FAULTS_MAJ = 6, - - PERF_SW_EVENTS_MAX, /* non ABI */ + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non ABI */ }; /* -- cgit v1.2.3 From 8be6e8f3c3a13900169f1141870562d0c723b010 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:19:11 +0200 Subject: perf_counter: Rename L2 to LL cache The top (fastest) and last level (biggest) caches are the most interesting ones, performance wise. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: [ Fixed the Nehalem LL table to LLC Reference/Miss events ] Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 887df88a9c2a..20cf5af27ade 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -56,14 +56,14 @@ enum perf_hw_id { /* * Generalized hardware cache counters: * - * { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x * { read, write, prefetch } x * { accesses, misses } */ enum perf_hw_cache_id { PERF_COUNT_HW_CACHE_L1D = 0, PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_L2 = 2, + PERF_COUNT_HW_CACHE_LL = 2, PERF_COUNT_HW_CACHE_DTLB = 3, PERF_COUNT_HW_CACHE_ITLB = 4, PERF_COUNT_HW_CACHE_BPU = 5, -- cgit v1.2.3 From a308444ceb576d3089f9ca0dfd097eba6f1e623f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Jun 2009 14:44:26 +0200 Subject: perf_counter: Better align code Whitespace and comment bits. Also update copyrights. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: --- include/linux/perf_counter.h | 165 ++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 20cf5af27ade..1fa1a26cb1b3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -1,12 +1,13 @@ /* * Performance counters: * - * Copyright(C) 2008, Thomas Gleixner - * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra * * Data type definitions, declarations, prototypes. * - * Started by: Thomas Gleixner and Ingo Molnar + * Started by: Thomas Gleixner and Ingo Molnar * * For licencing details see kernel-base/COPYING */ @@ -25,18 +26,19 @@ * attr.type */ enum perf_type_id { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, - PERF_TYPE_HW_CACHE = 3, - PERF_TYPE_RAW = 4, + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, - PERF_TYPE_MAX, /* non ABI */ + PERF_TYPE_MAX, /* non-ABI */ }; /* - * Generalized performance counter event types, used by the attr.event_id - * parameter of the sys_perf_counter_open() syscall: + * Generalized performance counter event types, used by the + * attr.event_id parameter of the sys_perf_counter_open() + * syscall: */ enum perf_hw_id { /* @@ -50,7 +52,7 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, - PERF_COUNT_HW_MAX, /* non ABI */ + PERF_COUNT_HW_MAX, /* non-ABI */ }; /* @@ -61,29 +63,29 @@ enum perf_hw_id { * { accesses, misses } */ enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - - PERF_COUNT_HW_CACHE_MAX, /* non ABI */ + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ }; enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - PERF_COUNT_HW_CACHE_OP_MAX, /* non ABI */ + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ }; enum perf_hw_cache_op_result_id { PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non ABI */ + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ }; /* @@ -93,15 +95,15 @@ enum perf_hw_cache_op_result_id { * well): */ enum perf_sw_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, - - PERF_COUNT_SW_MAX, /* non ABI */ + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ }; /* @@ -109,15 +111,15 @@ enum perf_sw_ids { * in the overflow packets. */ enum perf_counter_sample_format { - PERF_SAMPLE_IP = 1U << 0, - PERF_SAMPLE_TID = 1U << 1, - PERF_SAMPLE_TIME = 1U << 2, - PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_GROUP = 1U << 4, - PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_ID = 1U << 6, - PERF_SAMPLE_CPU = 1U << 7, - PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, }; /* @@ -126,9 +128,9 @@ enum perf_counter_sample_format { * in increasing order of bit value, after the counter value. */ enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, - PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, - PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, }; /* @@ -229,12 +231,12 @@ struct perf_counter_mmap_page { __u64 data_head; /* head in the data section */ }; -#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) -#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (2 << 0) -#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) +#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) +#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (2 << 0) +#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) +#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -351,14 +353,14 @@ struct hw_perf_counter { #ifdef CONFIG_PERF_COUNTERS union { struct { /* hardware */ - u64 config; - unsigned long config_base; - unsigned long counter_base; - int idx; + u64 config; + unsigned long config_base; + unsigned long counter_base; + int idx; }; union { /* software */ - atomic64_t count; - struct hrtimer hrtimer; + atomic64_t count; + struct hrtimer hrtimer; }; }; atomic64_t prev_count; @@ -523,37 +525,37 @@ struct perf_counter_context { * Protect the states of the counters in the list, * nr_active, and the list: */ - spinlock_t lock; + spinlock_t lock; /* * Protect the list of counters. Locking either mutex or lock * is sufficient to ensure the list doesn't change; to change * the list you need to lock both the mutex and the spinlock. */ - struct mutex mutex; + struct mutex mutex; - struct list_head counter_list; - struct list_head event_list; - int nr_counters; - int nr_active; - int is_active; - atomic_t refcount; - struct task_struct *task; + struct list_head counter_list; + struct list_head event_list; + int nr_counters; + int nr_active; + int is_active; + atomic_t refcount; + struct task_struct *task; /* * Context clock, runs when context enabled. */ - u64 time; - u64 timestamp; + u64 time; + u64 timestamp; /* * These fields let us detect when two contexts have both * been cloned (inherited) from a common ancestor. */ - struct perf_counter_context *parent_ctx; - u64 parent_gen; - u64 generation; - int pin_count; - struct rcu_head rcu_head; + struct perf_counter_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; }; /** @@ -604,9 +606,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); struct perf_sample_data { - struct pt_regs *regs; - u64 addr; - u64 period; + struct pt_regs *regs; + u64 addr; + u64 period; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, @@ -636,11 +638,14 @@ extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); -#define MAX_STACK_DEPTH 255 +#define MAX_STACK_DEPTH 255 struct perf_callchain_entry { - u16 nr, hv, kernel, user; - u64 ip[MAX_STACK_DEPTH]; + u16 nr; + u16 hv; + u16 kernel; + u16 user; + u64 ip[MAX_STACK_DEPTH]; }; extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); -- cgit v1.2.3 From cca3f454a85ff42d426401bce7ac804541b2bd03 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:57:55 +0200 Subject: perf_counter: Add counter->id to the throttle event So as to be able to distuinguish between multiple counters. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1fa1a26cb1b3..6e133954e2e4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -286,6 +286,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u64 time; + * u64 id; * }; */ PERF_EVENT_THROTTLE = 5, -- cgit v1.2.3 From 3c7b4e6b8be4c16f1e6e5c558e33b7ff0db2dfaf Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:22:39 +0100 Subject: kmemleak: Add the base support This patch adds the base support for the kernel memory leak detector. It traces the memory allocation/freeing in a way similar to the Boehm's conservative garbage collector, the difference being that the unreferenced objects are not freed but only shown in /sys/kernel/debug/kmemleak. Enabling this feature introduces an overhead to memory allocations. Signed-off-by: Catalin Marinas Cc: Ingo Molnar Acked-by: Pekka Enberg Cc: Andrew Morton Reviewed-by: Paul E. McKenney --- include/linux/kmemleak.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 include/linux/kmemleak.h (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h new file mode 100644 index 000000000000..7796aed6cdd5 --- /dev/null +++ b/include/linux/kmemleak.h @@ -0,0 +1,96 @@ +/* + * include/linux/kmemleak.h + * + * Copyright (C) 2008 ARM Limited + * Written by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __KMEMLEAK_H +#define __KMEMLEAK_H + +#ifdef CONFIG_DEBUG_KMEMLEAK + +extern void kmemleak_init(void); +extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp); +extern void kmemleak_free(const void *ptr); +extern void kmemleak_padding(const void *ptr, unsigned long offset, + size_t size); +extern void kmemleak_not_leak(const void *ptr); +extern void kmemleak_ignore(const void *ptr); +extern void kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp); +extern void kmemleak_no_scan(const void *ptr); + +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, unsigned long flags, + gfp_t gfp) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_alloc(ptr, size, min_count, gfp); +} + +static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_free(ptr); +} + +static inline void kmemleak_erase(void **ptr) +{ + *ptr = NULL; +} + +#else + +static inline void kmemleak_init(void) +{ +} +static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, unsigned long flags, + gfp_t gfp) +{ +} +static inline void kmemleak_free(const void *ptr) +{ +} +static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +{ +} +static inline void kmemleak_not_leak(const void *ptr) +{ +} +static inline void kmemleak_ignore(const void *ptr) +{ +} +static inline void kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp) +{ +} +static inline void kmemleak_erase(void **ptr) +{ +} +static inline void kmemleak_no_scan(const void *ptr) +{ +} + +#endif /* CONFIG_DEBUG_KMEMLEAK */ + +#endif /* __KMEMLEAK_H */ -- cgit v1.2.3 From d5cff635290aec9ad7e6ee546aa4fae895361cbb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:22:40 +0100 Subject: kmemleak: Add the slab memory allocation/freeing hooks This patch adds the callbacks to kmemleak_(alloc|free) functions from the slab allocator. The patch also adds the SLAB_NOLEAKTRACE flag to avoid recursive calls to kmemleak when it allocates its own data structures. Signed-off-by: Catalin Marinas Reviewed-by: Pekka Enberg --- include/linux/slab.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 24c5602bee99..48803064cedf 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -62,6 +62,8 @@ # define SLAB_DEBUG_OBJECTS 0x00000000UL #endif +#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ -- cgit v1.2.3 From 2e1483c995bbd0fa6cbd055ad76088a520799ba4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:24:13 +0100 Subject: kmemleak: Remove some of the kmemleak false positives There are allocations for which the main pointer cannot be found but they are not memory leaks. This patch fixes some of them. For more information on false positives, see Documentation/kmemleak.txt. Signed-off-by: Catalin Marinas --- include/linux/percpu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1581ff235c7e..26fd9d12f050 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -86,7 +86,12 @@ struct percpu_data { void *ptrs[1]; }; +/* pointer disguising messes up the kmemleak objects tracking */ +#ifndef CONFIG_DEBUG_KMEMLEAK #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +#else +#define __percpu_disguise(pdata) (struct percpu_data *)(pdata) +#endif #define per_cpu_ptr(ptr, cpu) \ ({ \ -- cgit v1.2.3 From 38c7fed2f5ffee17e1fa3e0f78b0e1bf43d52d13 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 May 2009 15:10:58 +0300 Subject: x86: remove some alloc_bootmem_cpumask_var calling Now that we set up the slab allocator earlier, we can get rid of some alloc_bootmem_cpumask_var() calls in boot code. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- include/linux/irq.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index eedbb8e5e0cc..1e50c34f0062 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -430,23 +430,19 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Returns true if successful (or not required). */ static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) + bool boot) { -#ifdef CONFIG_CPUMASK_OFFSTACK - if (boot) { - alloc_bootmem_cpumask_var(&desc->affinity); + gfp_t gfp = GFP_ATOMIC; -#ifdef CONFIG_GENERIC_PENDING_IRQ - alloc_bootmem_cpumask_var(&desc->pending_mask); -#endif - return true; - } + if (boot) + gfp = GFP_NOWAIT; - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) +#ifdef CONFIG_CPUMASK_OFFSTACK + if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { free_cpumask_var(desc->affinity); return false; } -- cgit v1.2.3 From 90586523eb4b349806887c62ee70685a49415124 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:20 -0400 Subject: fsnotify: unified filesystem notification backend fsnotify is a backend for filesystem notification. fsnotify does not provide any userspace interface but does provide the basis needed for other notification schemes such as dnotify. fsnotify can be extended to be the backend for inotify or the upcoming fanotify. fsnotify provides a mechanism for "groups" to register for some set of filesystem events and to then deliver those events to those groups for processing. fsnotify has a number of benefits, the first being actually shrinking the size of an inode. Before fsnotify to support both dnotify and inotify an inode had unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ struct list_head inotify_watches; /* watches on this inode */ struct mutex inotify_mutex; /* protects the watches list But with fsnotify this same functionallity (and more) is done with just __u32 i_fsnotify_mask; /* all events for this inode */ struct hlist_head i_fsnotify_mark_entries; /* marks on this inode */ That's right, inotify, dnotify, and fanotify all in 64 bits. We used that much space just in inotify_watches alone, before this patch set. fsnotify object lifetime and locking is MUCH better than what we have today. inotify locking is incredibly complex. See 8f7b0ba1c8539 as an example of what's been busted since inception. inotify needs to know internal semantics of superblock destruction and unmounting to function. The inode pinning and vfs contortions are horrible. no fsnotify implementers do allocation under locks. This means things like f04b30de3 which (due to an overabundance of caution) changes GFP_KERNEL to GFP_NOFS can be reverted. There are no longer any allocation rules when using or implementing your own fsnotify listener. fsnotify paves the way for fanotify. In brief fanotify is a notification mechanism that delivers the lisener both an 'event' and an open file descriptor to the object in question. This means that fanotify is pathname agnostic. Some on lkml may not care for the original companies or users that pushed for TALPA, but fanotify was designed with flexibility and input for other users in mind. The readahead group expressed interest in fanotify as it could be used to profile disk access on boot without breaking the audit system. The desktop search groups have also expressed interest in fanotify as it solves a number of the race conditions and problems present with managing inotify when more than a limited number of specific files are of interest. fanotify can provide for a userspace access control system which makes it a clean interface for AV vendors to hook without trying to do binary patching on the syscall table, LSM, and everywhere else they do their things today. With this patch series fanotify can be implemented in less than 1200 lines of easy to review code. Almost all of which is the socket based user interface. This patch series builds fsnotify to the point that it can implement dnotify and inotify_user. Patches exist and will be sent soon after acceptance to finish the in kernel inotify conversion (audit) and implement fanotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify.h | 115 +++++++++++++++++-------- include/linux/fsnotify_backend.h | 177 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 35 deletions(-) create mode 100644 include/linux/fsnotify_backend.h (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 00fbd5b245c9..6c9ebefdac8e 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -13,6 +13,7 @@ #include #include +#include #include /* @@ -34,6 +35,16 @@ static inline void fsnotify_d_move(struct dentry *entry) inotify_d_move(entry); } +/* + * fsnotify_link_count - inode's link count changed + */ +static inline void fsnotify_link_count(struct inode *inode) +{ + inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE); +} + /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ @@ -43,28 +54,47 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, { struct inode *source = moved->d_inode; u32 cookie = inotify_get_cookie(); + __u32 old_dir_mask = 0; + __u32 new_dir_mask = 0; - if (old_dir == new_dir) + if (old_dir == new_dir) { inode_dir_notify(old_dir, DN_RENAME); - else { + old_dir_mask = FS_DN_RENAME; + } else { inode_dir_notify(old_dir, DN_DELETE); + old_dir_mask = FS_DELETE; inode_dir_notify(new_dir, DN_CREATE); + new_dir_mask = FS_CREATE; } - if (isdir) + if (isdir) { isdir = IN_ISDIR; + old_dir_mask |= FS_IN_ISDIR; + new_dir_mask |= FS_IN_ISDIR; + } + + old_dir_mask |= FS_MOVED_FROM; + new_dir_mask |= FS_MOVED_TO; + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, source); inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, source); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE); + if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(target); + + /* this is really a link_count change not a removal */ + fsnotify_link_count(target); } if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE); } audit_inode_child(new_name, moved, new_dir); } @@ -74,10 +104,12 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + __u32 mask = FS_DELETE; + if (isdir) - isdir = IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_DELETE); - inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); } /* @@ -87,14 +119,8 @@ static inline void fsnotify_inoderemove(struct inode *inode) { inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); -} -/* - * fsnotify_link_count - inode's link count changed - */ -static inline void fsnotify_link_count(struct inode *inode) -{ - inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); } /* @@ -106,6 +132,8 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE); } /* @@ -120,6 +148,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct inode); fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); + + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE); } /* @@ -127,10 +157,14 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { + __u32 mask = (FS_CREATE | FS_IN_ISDIR); + struct inode *d_inode = dentry->d_inode; + inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, - dentry->d_name.name, dentry->d_inode); + inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE); } /* @@ -139,14 +173,16 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ACCESS; + __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_ACCESS); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -155,14 +191,16 @@ static inline void fsnotify_access(struct dentry *dentry) static inline void fsnotify_modify(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_MODIFY; + __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_MODIFY); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -171,13 +209,15 @@ static inline void fsnotify_modify(struct dentry *dentry) static inline void fsnotify_open(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_OPEN; + __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -189,13 +229,15 @@ static inline void fsnotify_close(struct file *file) struct inode *inode = dentry->d_inode; const char *name = dentry->d_name.name; fmode_t mode = file->f_mode; - u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; + __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); } /* @@ -204,13 +246,15 @@ static inline void fsnotify_close(struct file *file) static inline void fsnotify_xattr(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ATTRIB; + __u32 mask = FS_ATTRIB; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -221,34 +265,34 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { struct inode *inode = dentry->d_inode; int dn_mask = 0; - u32 in_mask = 0; + __u32 in_mask = 0; if (ia_valid & ATTR_UID) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } if (ia_valid & ATTR_GID) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } if (ia_valid & ATTR_SIZE) { - in_mask |= IN_MODIFY; + in_mask |= FS_MODIFY; dn_mask |= DN_MODIFY; } /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } else if (ia_valid & ATTR_ATIME) { - in_mask |= IN_ACCESS; + in_mask |= FS_ACCESS; dn_mask |= DN_ACCESS; } else if (ia_valid & ATTR_MTIME) { - in_mask |= IN_MODIFY; + in_mask |= FS_MODIFY; dn_mask |= DN_MODIFY; } if (ia_valid & ATTR_MODE) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } @@ -256,14 +300,15 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) dnotify_parent(dentry, dn_mask); if (in_mask) { if (S_ISDIR(inode->i_mode)) - in_mask |= IN_ISDIR; + in_mask |= FS_IN_ISDIR; inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); inotify_dentry_parent_queue_event(dentry, in_mask, 0, dentry->d_name.name); + fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); } } -#ifdef CONFIG_INOTIFY /* inotify helpers */ +#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY) /* notify helpers */ /* * fsnotify_oldname_init - save off the old filename before we change it @@ -281,7 +326,7 @@ static inline void fsnotify_oldname_free(const char *old_name) kfree(old_name); } -#else /* CONFIG_INOTIFY */ +#else /* CONFIG_INOTIFY || CONFIG_FSNOTIFY */ static inline const char *fsnotify_oldname_init(const char *name) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h new file mode 100644 index 000000000000..1a55718b38aa --- /dev/null +++ b/include/linux/fsnotify_backend.h @@ -0,0 +1,177 @@ +/* + * Filesystem access notification for Linux + * + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + */ + +#ifndef __LINUX_FSNOTIFY_BACKEND_H +#define __LINUX_FSNOTIFY_BACKEND_H + +#ifdef __KERNEL__ + +#include /* struct inode */ +#include +#include /* struct path */ +#include +#include + +#include + +/* + * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily + * convert between them. dnotify only needs conversion at watch creation + * so no perf loss there. fanotify isn't defined yet, so it can use the + * wholes if it needs more events. + */ +#define FS_ACCESS 0x00000001 /* File was accessed */ +#define FS_MODIFY 0x00000002 /* File was modified */ +#define FS_ATTRIB 0x00000004 /* Metadata changed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_OPEN 0x00000020 /* File was opened */ +#define FS_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FS_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FS_CREATE 0x00000100 /* Subfile was created */ +#define FS_DELETE 0x00000200 /* Subfile was deleted */ +#define FS_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FS_MOVE_SELF 0x00000800 /* Self was moved */ + +#define FS_UNMOUNT 0x00002000 /* inode on umount fs */ +#define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FS_IN_IGNORED 0x00008000 /* last inotify event here */ + +#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ + +struct fsnotify_group; +struct fsnotify_event; + +/* + * Each group much define these ops. The fsnotify infrastructure will call + * these operations for each relevant group. + * + * handle_event - main call for a group to handle an fs event + * free_group_priv - called when a group refcnt hits 0 to clean up the private union + */ +struct fsnotify_ops { + int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); + void (*free_group_priv)(struct fsnotify_group *group); +}; + +/* + * A group is a "thing" that wants to receive notification about filesystem + * events. The mask holds the subset of event types this group cares about. + * refcnt on a group is up to the implementor and at any moment if it goes 0 + * everything will be cleaned up. + */ +struct fsnotify_group { + /* + * global list of all groups receiving events from fsnotify. + * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex + * or fsnotify_grp_srcu depending on write vs read. + */ + struct list_head group_list; + + /* + * Defines all of the event types in which this group is interested. + * This mask is a bitwise OR of the FS_* events from above. Each time + * this mask changes for a group (if it changes) the correct functions + * must be called to update the global structures which indicate global + * interest in event types. + */ + __u32 mask; + + /* + * How the refcnt is used is up to each group. When the refcnt hits 0 + * fsnotify will clean up all of the resources associated with this group. + * As an example, the dnotify group will always have a refcnt=1 and that + * will never change. Inotify, on the other hand, has a group per + * inotify_init() and the refcnt will hit 0 only when that fd has been + * closed. + */ + atomic_t refcnt; /* things with interest in this group */ + unsigned int group_num; /* simply prevents accidental group collision */ + + const struct fsnotify_ops *ops; /* how this group handles things */ + + /* prevents double list_del of group_list. protected by global fsnotify_gr_mutex */ + bool on_group_list; + + /* groups can define private fields here or use the void *private */ + union { + void *private; + }; +}; + +/* + * all of the information about the original object we want to now send to + * a group. If you want to carry more info from the accessing task to the + * listener this structure is where you need to be adding fields. + */ +struct fsnotify_event { + spinlock_t lock; /* protection for the associated event_holder and private_list */ + /* to_tell may ONLY be dereferenced during handle_event(). */ + struct inode *to_tell; /* either the inode the event happened to or its parent */ + /* + * depending on the event type we should have either a path or inode + * We hold a reference on path, but NOT on inode. Since we have the ref on + * the path, it may be dereferenced at any point during this object's + * lifetime. That reference is dropped when this object's refcnt hits + * 0. If this event contains an inode instead of a path, the inode may + * ONLY be used during handle_event(). + */ + union { + struct path path; + struct inode *inode; + }; +/* when calling fsnotify tell it if the data is a path or inode */ +#define FSNOTIFY_EVENT_NONE 0 +#define FSNOTIFY_EVENT_PATH 1 +#define FSNOTIFY_EVENT_INODE 2 +#define FSNOTIFY_EVENT_FILE 3 + int data_type; /* which of the above union we have */ + atomic_t refcnt; /* how many groups still are using/need to send this event */ + __u32 mask; /* the type of access, bitwise OR for FS_* event types */ +}; + +#ifdef CONFIG_FSNOTIFY + +/* called from the vfs helpers */ + +/* main fsnotify call to send events */ +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); + + +/* called from fsnotify listeners, such as fanotify or dnotify */ + +/* must call when a group changes its ->mask */ +extern void fsnotify_recalc_global_mask(void); +/* get a reference to an existing or create a new group */ +extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, + __u32 mask, + const struct fsnotify_ops *ops); +/* drop reference on a group from fsnotify_obtain_group */ +extern void fsnotify_put_group(struct fsnotify_group *group); + +/* take a reference to an event */ +extern void fsnotify_get_event(struct fsnotify_event *event); +extern void fsnotify_put_event(struct fsnotify_event *event); +/* find private data previously attached to an event */ +extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, + struct fsnotify_event *event); + +/* put here because inotify does some weird stuff when destroying watches */ +extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_is); +#else + +static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +{} +#endif /* CONFIG_FSNOTIFY */ + +#endif /* __KERNEL __ */ + +#endif /* __LINUX_FSNOTIFY_BACKEND_H */ -- cgit v1.2.3 From 3be25f49b9d6a97eae9bcb96d3292072b7658bd8 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:26 -0400 Subject: fsnotify: add marks to inodes so groups can interpret how to handle those inodes This patch creates a way for fsnotify groups to attach marks to inodes. These marks have little meaning to the generic fsnotify infrastructure and thus their meaning should be interpreted by the group that attached them to the inode's list. dnotify and inotify will make use of these markings to indicate which inodes are of interest to their respective groups. But this implementation has the useful property that in the future other listeners could actually use the marks for the exact opposite reason, aka to indicate which inodes it had NO interest in. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fs.h | 5 ++++ include/linux/fsnotify.h | 9 ++++++ include/linux/fsnotify_backend.h | 65 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 83d6b4397245..275b0860044c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -755,6 +755,11 @@ struct inode { __u32 i_generation; +#ifdef CONFIG_FSNOTIFY + __u32 i_fsnotify_mask; /* all events this inode cares about */ + struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ +#endif + #ifdef CONFIG_DNOTIFY unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6c9ebefdac8e..3856eb6e5973 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -99,6 +99,14 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, audit_inode_child(new_name, moved, new_dir); } +/* + * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed + */ +static inline void fsnotify_inode_delete(struct inode *inode) +{ + __fsnotify_inode_delete(inode); +} + /* * fsnotify_nameremove - a filename was removed from a directory */ @@ -121,6 +129,7 @@ static inline void fsnotify_inoderemove(struct inode *inode) inotify_inode_is_dead(inode); fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); + __fsnotify_inode_delete(inode); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1a55718b38aa..cad5c4d75c1d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -48,17 +48,25 @@ struct fsnotify_group; struct fsnotify_event; +struct fsnotify_mark_entry; /* * Each group much define these ops. The fsnotify infrastructure will call * these operations for each relevant group. * + * should_send_event - given a group, inode, and mask this function determines + * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union + * freeing-mark - this means that a mark has been flagged to die when everything + * finishes using it. The function is supplied with what must be a + * valid group and inode to use to clean up. */ struct fsnotify_ops { + bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask); int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); void (*free_group_priv)(struct fsnotify_group *group); + void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); }; /* @@ -97,7 +105,14 @@ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ - /* prevents double list_del of group_list. protected by global fsnotify_gr_mutex */ + /* stores all fastapth entries assoc with this group so they can be cleaned on unregister */ + spinlock_t mark_lock; /* protect mark_entries list */ + atomic_t num_marks; /* 1 for each mark entry and 1 for not being + * past the point of no return when freeing + * a group */ + struct list_head mark_entries; /* all inode mark entries for this group */ + + /* prevents double list_del of group_list. protected by global fsnotify_grp_mutex */ bool on_group_list; /* groups can define private fields here or use the void *private */ @@ -137,12 +152,38 @@ struct fsnotify_event { __u32 mask; /* the type of access, bitwise OR for FS_* event types */ }; +/* + * a mark is simply an entry attached to an in core inode which allows an + * fsnotify listener to indicate they are either no longer interested in events + * of a type matching mask or only interested in those events. + * + * these are flushed when an inode is evicted from core and may be flushed + * when the inode is modified (as seen by fsnotify_access). Some fsnotify users + * (such as dnotify) will flush these when the open fd is closed and not at + * inode eviction or modification. + */ +struct fsnotify_mark_entry { + __u32 mask; /* mask this mark entry is for */ + /* we hold ref for each i_list and g_list. also one ref for each 'thing' + * in kernel that found and may be using this mark. */ + atomic_t refcnt; /* active things looking at this mark */ + struct inode *inode; /* inode this entry is associated with */ + struct fsnotify_group *group; /* group this mark entry is for */ + struct hlist_node i_list; /* list of mark_entries by inode->i_fsnotify_mark_entries */ + struct list_head g_list; /* list of mark_entries by group->i_fsnotify_mark_entries */ + spinlock_t lock; /* protect group, inode, and killme */ + struct list_head free_i_list; /* tmp list used when freeing this mark */ + struct list_head free_g_list; /* tmp list used when freeing this mark */ + void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */ +}; + #ifdef CONFIG_FSNOTIFY /* called from the vfs helpers */ /* main fsnotify call to send events */ extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); +extern void __fsnotify_inode_delete(struct inode *inode); /* called from fsnotify listeners, such as fanotify or dnotify */ @@ -153,6 +194,8 @@ extern void fsnotify_recalc_global_mask(void); extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, const struct fsnotify_ops *ops); +/* run all marks associated with this group and update group->mask */ +extern void fsnotify_recalc_group_mask(struct fsnotify_group *group); /* drop reference on a group from fsnotify_obtain_group */ extern void fsnotify_put_group(struct fsnotify_group *group); @@ -163,6 +206,22 @@ extern void fsnotify_put_event(struct fsnotify_event *event); extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event); +/* functions used to manipulate the marks attached to inodes */ + +/* run all marks associated with an inode and update inode->i_fsnotify_mask */ +extern void fsnotify_recalc_inode_mask(struct inode *inode); +extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry)); +/* find (and take a reference) to a mark associated with group and inode */ +extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode); +/* attach the mark to both the group and the inode */ +extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode); +/* given a mark, flag it to be freed when all references are dropped */ +extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); +/* run all the marks in a group, and flag them to be freed */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); +extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); + /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, int data_is); @@ -170,6 +229,10 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) {} + +static inline void __fsnotify_inode_delete(struct inode *inode) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3 From c28f7e56e9d95fb531dc3be8df2e7f52bee76d21 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:29 -0400 Subject: fsnotify: parent event notification inotify and dnotify both use a similar parent notification mechanism. We add a generic parent notification mechanism to fsnotify for both of these to use. This new machanism also adds the dentry flag optimization which exists for inotify to dnotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/dcache.h | 4 ++- include/linux/fsnotify.h | 34 +++++++++++++++------ include/linux/fsnotify_backend.h | 64 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 15156364d196..97978004338d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,10 +180,12 @@ d_iput: no no no yes #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 3856eb6e5973..6a662ed0bc8a 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -23,15 +23,31 @@ static inline void fsnotify_d_instantiate(struct dentry *entry, struct inode *inode) { + __fsnotify_d_instantiate(entry, inode); + inotify_d_instantiate(entry, inode); } +/* Notify this dentry's parent about a child's events. */ +static inline void fsnotify_parent(struct dentry *dentry, __u32 mask) +{ + __fsnotify_parent(dentry, mask); + + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); +} + /* * fsnotify_d_move - entry has been moved * Called with dcache_lock and entry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *entry) { + /* + * On move we need to update entry->d_flags to indicate if the new parent + * cares about events from this entry. + */ + __fsnotify_update_dcache_flags(entry); + inotify_d_move(entry); } @@ -117,7 +133,8 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) if (isdir) mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_DELETE); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); + + fsnotify_parent(dentry, mask); } /* @@ -188,9 +205,9 @@ static inline void fsnotify_access(struct dentry *dentry) mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_ACCESS); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -206,9 +223,9 @@ static inline void fsnotify_modify(struct dentry *dentry) mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_MODIFY); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -223,9 +240,9 @@ static inline void fsnotify_open(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -236,16 +253,15 @@ static inline void fsnotify_close(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - const char *name = dentry->d_name.name; fmode_t mode = file->f_mode; __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); } @@ -260,9 +276,9 @@ static inline void fsnotify_xattr(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -311,8 +327,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (S_ISDIR(inode->i_mode)) in_mask |= FS_IN_ISDIR; inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); - inotify_dentry_parent_queue_event(dentry, in_mask, 0, - dentry->d_name.name); + + fsnotify_parent(dentry, in_mask); fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index cad5c4d75c1d..13d2dd570049 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -46,6 +46,17 @@ #define FS_DN_RENAME 0x10000000 /* file renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ +/* This inode cares about things that happen to its children. Always set for + * dnotify and inotify. */ +#define FS_EVENT_ON_CHILD 0x08000000 + +/* This is a list of all events that may get sent to a parernt based on fs event + * happening to inodes inside that directory */ +#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ + FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ + FS_DELETE) + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark_entry; @@ -183,8 +194,52 @@ struct fsnotify_mark_entry { /* main fsnotify call to send events */ extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); +extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); +static inline int fsnotify_inode_watches_children(struct inode *inode) +{ + /* FS_EVENT_ON_CHILD is set if the inode may care */ + if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) + return 0; + /* this inode might care about child events, does it care about the + * specific set of events that can happen on a child? */ + return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; +} + +/* + * Update the dentry with a flag indicating the interest of its parent to receive + * filesystem events when those events happens to this dentry->d_inode. + */ +static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +{ + struct dentry *parent; + + assert_spin_locked(&dcache_lock); + assert_spin_locked(&dentry->d_lock); + + parent = dentry->d_parent; + if (fsnotify_inode_watches_children(parent->d_inode)) + dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; + else + dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; +} + +/* + * fsnotify_d_instantiate - instantiate a dentry for inode + * Called with dcache_lock held. + */ +static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +{ + if (!inode) + return; + + assert_spin_locked(&dcache_lock); + + spin_lock(&dentry->d_lock); + __fsnotify_update_dcache_flags(dentry); + spin_unlock(&dentry->d_lock); +} /* called from fsnotify listeners, such as fanotify or dnotify */ @@ -230,9 +285,18 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) {} +static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) +{} + static inline void __fsnotify_inode_delete(struct inode *inode) {} +static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +{} + +static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3 From 3c5119c05d624f95f4967d16b38c9624b816bdb9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:33 -0400 Subject: dnotify: reimplement dnotify using fsnotify Reimplement dnotify using fsnotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/dnotify.h | 29 +++++------------ include/linux/fs.h | 5 --- include/linux/fsnotify.h | 68 +++++++++++++--------------------------- include/linux/fsnotify_backend.h | 3 ++ 4 files changed, 32 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index 102a902b4396..ecc06286226d 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -10,7 +10,7 @@ struct dnotify_struct { struct dnotify_struct * dn_next; - unsigned long dn_mask; + __u32 dn_mask; int dn_fd; struct file * dn_filp; fl_owner_t dn_owner; @@ -21,23 +21,18 @@ struct dnotify_struct { #ifdef CONFIG_DNOTIFY -extern void __inode_dir_notify(struct inode *, unsigned long); +#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\ + FS_MODIFY | FS_MODIFY_CHILD |\ + FS_ACCESS | FS_ACCESS_CHILD |\ + FS_ATTRIB | FS_ATTRIB_CHILD |\ + FS_CREATE | FS_DN_RENAME |\ + FS_MOVED_FROM | FS_MOVED_TO) + extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); -extern void dnotify_parent(struct dentry *, unsigned long); - -static inline void inode_dir_notify(struct inode *inode, unsigned long event) -{ - if (inode->i_dnotify_mask & (event)) - __inode_dir_notify(inode, event); -} #else -static inline void __inode_dir_notify(struct inode *inode, unsigned long event) -{ -} - static inline void dnotify_flush(struct file *filp, fl_owner_t id) { } @@ -47,14 +42,6 @@ static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) return -EINVAL; } -static inline void dnotify_parent(struct dentry *dentry, unsigned long event) -{ -} - -static inline void inode_dir_notify(struct inode *inode, unsigned long event) -{ -} - #endif /* CONFIG_DNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 275b0860044c..323b5ce474c1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -760,11 +760,6 @@ struct inode { struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ #endif -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -#endif - #ifdef CONFIG_INOTIFY struct list_head inotify_watches; /* watches on this inode */ struct mutex inotify_mutex; /* protects the watches list */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6a662ed0bc8a..db12d9de3526 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -74,13 +74,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, __u32 new_dir_mask = 0; if (old_dir == new_dir) { - inode_dir_notify(old_dir, DN_RENAME); old_dir_mask = FS_DN_RENAME; - } else { - inode_dir_notify(old_dir, DN_DELETE); - old_dir_mask = FS_DELETE; - inode_dir_notify(new_dir, DN_CREATE); - new_dir_mask = FS_CREATE; } if (isdir) { @@ -132,7 +126,6 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) if (isdir) mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_DELETE); fsnotify_parent(dentry, mask); } @@ -154,7 +147,6 @@ static inline void fsnotify_inoderemove(struct inode *inode) */ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { - inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); @@ -169,7 +161,6 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) */ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) { - inode_dir_notify(dir, DN_CREATE); inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name, inode); fsnotify_link_count(inode); @@ -186,7 +177,6 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) __u32 mask = (FS_CREATE | FS_IN_ISDIR); struct inode *d_inode = dentry->d_inode; - inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); @@ -204,7 +194,6 @@ static inline void fsnotify_access(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_ACCESS); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); @@ -222,7 +211,6 @@ static inline void fsnotify_modify(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_MODIFY); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); @@ -289,47 +277,33 @@ static inline void fsnotify_xattr(struct dentry *dentry) static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { struct inode *inode = dentry->d_inode; - int dn_mask = 0; - __u32 in_mask = 0; + __u32 mask = 0; + + if (ia_valid & ATTR_UID) + mask |= FS_ATTRIB; + if (ia_valid & ATTR_GID) + mask |= FS_ATTRIB; + if (ia_valid & ATTR_SIZE) + mask |= FS_MODIFY; - if (ia_valid & ATTR_UID) { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } - if (ia_valid & ATTR_GID) { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } - if (ia_valid & ATTR_SIZE) { - in_mask |= FS_MODIFY; - dn_mask |= DN_MODIFY; - } /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) - { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } else if (ia_valid & ATTR_ATIME) { - in_mask |= FS_ACCESS; - dn_mask |= DN_ACCESS; - } else if (ia_valid & ATTR_MTIME) { - in_mask |= FS_MODIFY; - dn_mask |= DN_MODIFY; - } - if (ia_valid & ATTR_MODE) { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } + mask |= FS_ATTRIB; + else if (ia_valid & ATTR_ATIME) + mask |= FS_ACCESS; + else if (ia_valid & ATTR_MTIME) + mask |= FS_MODIFY; + + if (ia_valid & ATTR_MODE) + mask |= FS_ATTRIB; - if (dn_mask) - dnotify_parent(dentry, dn_mask); - if (in_mask) { + if (mask) { if (S_ISDIR(inode->i_mode)) - in_mask |= FS_IN_ISDIR; - inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); + mask |= FS_IN_ISDIR; + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); - fsnotify_parent(dentry, in_mask); - fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 13d2dd570049..9ea800e840f1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -57,6 +57,9 @@ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ FS_DELETE) +/* listeners that hard code group numbers near the top */ +#define DNOTIFY_GROUP_NUM UINT_MAX + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark_entry; -- cgit v1.2.3 From a2d8bc6cb4a3024661baf877242f123787d0c054 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:37 -0400 Subject: fsnotify: generic notification queue and waitq inotify needs to do asyc notification in which event information is stored on a queue until the listener is ready to receive it. This patch implements a generic notification queue for inotify (and later fanotify) to store events to be sent at a later time. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify_backend.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9ea800e840f1..15f8f82a5c57 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -119,6 +119,13 @@ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ + /* needed to send notification to userspace */ + struct mutex notification_mutex; /* protect the notification_list */ + struct list_head notification_list; /* list of event_holder this group needs to send to userspace */ + wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ + unsigned int q_len; /* events on the queue */ + unsigned int max_events; /* maximum events allowed on the list */ + /* stores all fastapth entries assoc with this group so they can be cleaned on unregister */ spinlock_t mark_lock; /* protect mark_entries list */ atomic_t num_marks; /* 1 for each mark entry and 1 for not being @@ -135,12 +142,33 @@ struct fsnotify_group { }; }; +/* + * A single event can be queued in multiple group->notification_lists. + * + * each group->notification_list will point to an event_holder which in turns points + * to the actual event that needs to be sent to userspace. + * + * Seemed cheaper to create a refcnt'd event and a small holder for every group + * than create a different event for every group + * + */ +struct fsnotify_event_holder { + struct fsnotify_event *event; + struct list_head event_list; +}; + /* * all of the information about the original object we want to now send to * a group. If you want to carry more info from the accessing task to the * listener this structure is where you need to be adding fields. */ struct fsnotify_event { + /* + * If we create an event we are also likely going to need a holder + * to link to a group. So embed one holder in the event. Means only + * one allocation for the common case where we only have one group + */ + struct fsnotify_event_holder holder; spinlock_t lock; /* protection for the associated event_holder and private_list */ /* to_tell may ONLY be dereferenced during handle_event(). */ struct inode *to_tell; /* either the inode the event happened to or its parent */ @@ -264,6 +292,15 @@ extern void fsnotify_put_event(struct fsnotify_event *event); extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event); +/* attach the event to the group notification queue */ +extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event); +/* true if the group notification queue is empty */ +extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); +/* return, but do not dequeue the first event on the notification queue */ +extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); +/* reutnr AND dequeue the first event on the notification queue */ +extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); + /* functions used to manipulate the marks attached to inodes */ /* run all marks associated with an inode and update inode->i_fsnotify_mask */ -- cgit v1.2.3 From 62ffe5dfba056f7ba81d710fee9f28c58a42fdd6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:43 -0400 Subject: fsnotify: include pathnames with entries when possible When inotify wants to send events to a directory about a child it includes the name of the original file. This patch collects that filename and makes it available for notification. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify.h | 28 ++++++++++++++-------------- include/linux/fsnotify_backend.h | 11 ++++++++--- 2 files changed, 22 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index db12d9de3526..180740e9ec82 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode) { inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); - fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -91,8 +91,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, source); - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name); if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); @@ -104,7 +104,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); - fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL); } audit_inode_child(new_name, moved, new_dir); } @@ -138,7 +138,7 @@ static inline void fsnotify_inoderemove(struct inode *inode) inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); - fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL); __fsnotify_inode_delete(inode); } @@ -151,7 +151,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); } /* @@ -166,7 +166,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE); + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name); } /* @@ -180,7 +180,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); } /* @@ -197,7 +197,7 @@ static inline void fsnotify_access(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -214,7 +214,7 @@ static inline void fsnotify_modify(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -231,7 +231,7 @@ static inline void fsnotify_open(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -250,7 +250,7 @@ static inline void fsnotify_close(struct file *file) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL); } /* @@ -267,7 +267,7 @@ static inline void fsnotify_xattr(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -303,7 +303,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 15f8f82a5c57..52692f405890 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -192,6 +192,9 @@ struct fsnotify_event { int data_type; /* which of the above union we have */ atomic_t refcnt; /* how many groups still are using/need to send this event */ __u32 mask; /* the type of access, bitwise OR for FS_* event types */ + + char *file_name; + size_t name_len; }; /* @@ -224,7 +227,7 @@ struct fsnotify_mark_entry { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name); extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); @@ -319,10 +322,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_is); + void *data, int data_is, const char *name); + #else -static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + const char *name); {} static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) -- cgit v1.2.3 From 47882c6f51e8ef41fbbe2bbb746a1ea3228dd7ca Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:47 -0400 Subject: fsnotify: add correlations between events As part of the standard inotify events it includes a correlation cookie between two dentry move operations. This patch includes the same behaviour in fsnotify events. It is needed so that inotify userspace can be implemented on top of fsnotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify.h | 35 ++++++++++++++++++----------------- include/linux/fsnotify_backend.h | 15 ++++++++++++--- 2 files changed, 30 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 180740e9ec82..c25b39ddd62a 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode) { inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); - fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -69,7 +69,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, int isdir, struct inode *target, struct dentry *moved) { struct inode *source = moved->d_inode; - u32 cookie = inotify_get_cookie(); + u32 in_cookie = inotify_get_cookie(); + u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = 0; __u32 new_dir_mask = 0; @@ -86,13 +87,13 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, old_dir_mask |= FS_MOVED_FROM; new_dir_mask |= FS_MOVED_TO; - inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name, source); - inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, + inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name, source); - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); @@ -104,7 +105,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); - fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0); } audit_inode_child(new_name, moved, new_dir); } @@ -138,7 +139,7 @@ static inline void fsnotify_inoderemove(struct inode *inode) inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); - fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0); __fsnotify_inode_delete(inode); } @@ -151,7 +152,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); } /* @@ -166,7 +167,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name); + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0); } /* @@ -180,7 +181,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); } /* @@ -197,7 +198,7 @@ static inline void fsnotify_access(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -214,7 +215,7 @@ static inline void fsnotify_modify(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -231,7 +232,7 @@ static inline void fsnotify_open(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -250,7 +251,7 @@ static inline void fsnotify_close(struct file *file) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL); + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0); } /* @@ -267,7 +268,7 @@ static inline void fsnotify_xattr(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -303,7 +304,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 52692f405890..b78b5573d227 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -193,6 +193,7 @@ struct fsnotify_event { atomic_t refcnt; /* how many groups still are using/need to send this event */ __u32 mask; /* the type of access, bitwise OR for FS_* event types */ + u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ char *file_name; size_t name_len; }; @@ -227,9 +228,11 @@ struct fsnotify_mark_entry { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name); +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + const char *name, u32 cookie); extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); +extern u32 fsnotify_get_cookie(void); static inline int fsnotify_inode_watches_children(struct inode *inode) { @@ -322,12 +325,13 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_is, const char *name); + void *data, int data_is, const char *name, + u32 cookie); #else static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, - const char *name); + const char *name, u32 cookie) {} static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) @@ -342,6 +346,11 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) {} +static inline u32 fsnotify_get_cookie(void) +{ + return 0; +} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3 From e4aff117368cfdd3567ee41844d216d079b55173 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:50 -0400 Subject: fsnotify: allow groups to add private data to events inotify needs per group information attached to events. This patch allows groups to attach private information and implements a callback so that information can be freed when an event is being destroyed. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify_backend.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b78b5573d227..efdf9e442d86 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -63,6 +63,7 @@ struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark_entry; +struct fsnotify_event_private_data; /* * Each group much define these ops. The fsnotify infrastructure will call @@ -81,6 +82,7 @@ struct fsnotify_ops { int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); + void (*free_event_priv)(struct fsnotify_event_private_data *priv); }; /* @@ -157,6 +159,15 @@ struct fsnotify_event_holder { struct list_head event_list; }; +/* + * Inotify needs to tack data onto an event. This struct lets us later find the + * correct private data of the correct group. + */ +struct fsnotify_event_private_data { + struct fsnotify_group *group; + struct list_head event_list; +}; + /* * all of the information about the original object we want to now send to * a group. If you want to carry more info from the accessing task to the @@ -196,6 +207,8 @@ struct fsnotify_event { u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ char *file_name; size_t name_len; + + struct list_head private_data_list; /* groups can store private data here */ }; /* @@ -294,17 +307,18 @@ extern void fsnotify_put_group(struct fsnotify_group *group); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -/* find private data previously attached to an event */ -extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, - struct fsnotify_event *event); +/* find private data previously attached to an event and unlink it */ +extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, + struct fsnotify_event *event); /* attach the event to the group notification queue */ -extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event); +extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, + struct fsnotify_event_private_data *priv); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); -/* reutnr AND dequeue the first event on the notification queue */ +/* return AND dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); /* functions used to manipulate the marks attached to inodes */ -- cgit v1.2.3 From 164bc6195139047faaf5ada1278332e99494803b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:58 -0400 Subject: fsnotify: handle filesystem unmounts with fsnotify marks When an fs is unmounted with an fsnotify mark entry attached to one of its inodes we need to destroy that mark entry and we also (like inotify) send an unmount event. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify_backend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index efdf9e442d86..d2c0ee30e618 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -336,6 +336,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_unmount_inodes(struct list_head *list); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, @@ -365,6 +366,9 @@ static inline u32 fsnotify_get_cookie(void) return 0; } +static inline void fsnotify_unmount_inodes(struct list_head *list) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3 From 63c882a05416e18de6fb59f7dd6da48f3bbe8273 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:02:01 -0400 Subject: inotify: reimplement inotify using fsnotify Reimplement inotify_user using fsnotify. This should be feature for feature exactly the same as the original inotify_user. This does not make any changes to the in kernel inotify feature used by audit. Those patches (and the eventual removal of in kernel inotify) will come after the new inotify_user proves to be working correctly. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- include/linux/fsnotify_backend.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d2c0ee30e618..44848aa830dc 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -9,6 +9,7 @@ #ifdef __KERNEL__ +#include /* inotify uses this */ #include /* struct inode */ #include #include /* struct path */ @@ -59,6 +60,7 @@ /* listeners that hard code group numbers near the top */ #define DNOTIFY_GROUP_NUM UINT_MAX +#define INOTIFY_GROUP_NUM (DNOTIFY_GROUP_NUM-1) struct fsnotify_group; struct fsnotify_event; @@ -141,6 +143,15 @@ struct fsnotify_group { /* groups can define private fields here or use the void *private */ union { void *private; +#ifdef CONFIG_INOTIFY_USER + struct inotify_group_private_data { + spinlock_t idr_lock; + struct idr idr; + u32 last_wd; + struct fasync_struct *fa; /* async notification */ + struct user_struct *user; + } inotify_data; +#endif }; }; -- cgit v1.2.3 From ff52cc2158b32b3b979ca7802b1fd7c70f36e13c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 11 Jun 2009 11:09:47 -0400 Subject: fsnotify: move events should indicate the event was on a child fsnotify tells its listeners explicitly when an event happened on the given inode verses on the child of the given inode. (see __fsnotify_parent) However, the semantics of fsnotify_move() are such that we deliver events directly to the two parent directories in question (old_dir and new_dir) directly without using the __fsnotify_parent() call. fsnotify should be adding FS_EVENT_ON_CHILD for the notifications to these parents. Signed-off-by: Eric Paris --- include/linux/fsnotify.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index c25b39ddd62a..936f9aa8bb97 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -71,12 +71,11 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, struct inode *source = moved->d_inode; u32 in_cookie = inotify_get_cookie(); u32 fs_cookie = fsnotify_get_cookie(); - __u32 old_dir_mask = 0; - __u32 new_dir_mask = 0; + __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); + __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); - if (old_dir == new_dir) { - old_dir_mask = FS_DN_RENAME; - } + if (old_dir == new_dir) + old_dir_mask |= FS_DN_RENAME; if (isdir) { isdir = IN_ISDIR; @@ -84,9 +83,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, new_dir_mask |= FS_IN_ISDIR; } - old_dir_mask |= FS_MOVED_FROM; - new_dir_mask |= FS_MOVED_TO; - inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name, source); inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name, -- cgit v1.2.3 From 63b852a6b67d0820d388b0ecd0da83ccb4048b8d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:24 +0000 Subject: asm-generic: rename termios.h, signal.h and mman.h The existing asm-generic versions are incomplete and included by some architectures. New architectures should be able to use a generic version, so rename the existing files and change all users, which lets us add the new files. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 4 +- include/asm-generic/mman-common.h | 41 ++++++++++++++++++++ include/asm-generic/mman.h | 41 -------------------- include/asm-generic/signal-defs.h | 28 ++++++++++++++ include/asm-generic/signal.h | 28 -------------- include/asm-generic/termios-base.h | 77 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/termios.h | 77 -------------------------------------- 7 files changed, 148 insertions(+), 148 deletions(-) create mode 100644 include/asm-generic/mman-common.h delete mode 100644 include/asm-generic/mman.h create mode 100644 include/asm-generic/signal-defs.h delete mode 100644 include/asm-generic/signal.h create mode 100644 include/asm-generic/termios-base.h delete mode 100644 include/asm-generic/termios.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 4c9932a2503f..460b08d51e2e 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -2,9 +2,9 @@ header-y += errno-base.h header-y += errno.h header-y += fcntl.h header-y += ioctl.h -header-y += mman.h +header-y += mman-common.h header-y += poll.h -header-y += signal.h +header-y += signal-defs.h header-y += statfs.h unifdef-y += int-l64.h diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h new file mode 100644 index 000000000000..3b69ad34189a --- /dev/null +++ b/include/asm-generic/mman-common.h @@ -0,0 +1,41 @@ +#ifndef __ASM_GENERIC_MMAN_COMMON_H +#define __ASM_GENERIC_MMAN_COMMON_H + +/* + Author: Michael S. Tsirkin , Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#endif /* __ASM_GENERIC_MMAN_COMMON_H */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h deleted file mode 100644 index 5e3dde2ee5ad..000000000000 --- a/include/asm-generic/mman.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _ASM_GENERIC_MMAN_H -#define _ASM_GENERIC_MMAN_H - -/* - Author: Michael S. Tsirkin , Mellanox Technologies Ltd. - Based on: asm-xxx/mman.h -*/ - -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ - -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - -#define MADV_NORMAL 0 /* no further special treatment */ -#define MADV_RANDOM 1 /* expect random page references */ -#define MADV_SEQUENTIAL 2 /* expect sequential page references */ -#define MADV_WILLNEED 3 /* will need these pages */ -#define MADV_DONTNEED 4 /* don't need these pages */ - -/* common parameters: try to keep these consistent across architectures */ -#define MADV_REMOVE 9 /* remove these pages & resources */ -#define MADV_DONTFORK 10 /* don't inherit across fork */ -#define MADV_DOFORK 11 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_FILE 0 - -#endif diff --git a/include/asm-generic/signal-defs.h b/include/asm-generic/signal-defs.h new file mode 100644 index 000000000000..00f95df54297 --- /dev/null +++ b/include/asm-generic/signal-defs.h @@ -0,0 +1,28 @@ +#ifndef __ASM_GENERIC_SIGNAL_DEFS_H +#define __ASM_GENERIC_SIGNAL_DEFS_H + +#include + +#ifndef SIG_BLOCK +#define SIG_BLOCK 0 /* for blocking signals */ +#endif +#ifndef SIG_UNBLOCK +#define SIG_UNBLOCK 1 /* for unblocking signals */ +#endif +#ifndef SIG_SETMASK +#define SIG_SETMASK 2 /* for setting the signal mask */ +#endif + +#ifndef __ASSEMBLY__ +typedef void __signalfn_t(int); +typedef __signalfn_t __user *__sighandler_t; + +typedef void __restorefn_t(void); +typedef __restorefn_t __user *__sigrestore_t; + +#define SIG_DFL ((__force __sighandler_t)0) /* default signal handling */ +#define SIG_IGN ((__force __sighandler_t)1) /* ignore signal */ +#define SIG_ERR ((__force __sighandler_t)-1) /* error return from signal */ +#endif + +#endif /* __ASM_GENERIC_SIGNAL_DEFS_H */ diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h deleted file mode 100644 index dae1d8720076..000000000000 --- a/include/asm-generic/signal.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __ASM_GENERIC_SIGNAL_H -#define __ASM_GENERIC_SIGNAL_H - -#include - -#ifndef SIG_BLOCK -#define SIG_BLOCK 0 /* for blocking signals */ -#endif -#ifndef SIG_UNBLOCK -#define SIG_UNBLOCK 1 /* for unblocking signals */ -#endif -#ifndef SIG_SETMASK -#define SIG_SETMASK 2 /* for setting the signal mask */ -#endif - -#ifndef __ASSEMBLY__ -typedef void __signalfn_t(int); -typedef __signalfn_t __user *__sighandler_t; - -typedef void __restorefn_t(void); -typedef __restorefn_t __user *__sigrestore_t; - -#define SIG_DFL ((__force __sighandler_t)0) /* default signal handling */ -#define SIG_IGN ((__force __sighandler_t)1) /* ignore signal */ -#define SIG_ERR ((__force __sighandler_t)-1) /* error return from signal */ -#endif - -#endif /* __ASM_GENERIC_SIGNAL_H */ diff --git a/include/asm-generic/termios-base.h b/include/asm-generic/termios-base.h new file mode 100644 index 000000000000..0a769feb22b0 --- /dev/null +++ b/include/asm-generic/termios-base.h @@ -0,0 +1,77 @@ +/* termios.h: generic termios/termio user copying/translation + */ + +#ifndef _ASM_GENERIC_TERMIOS_BASE_H +#define _ASM_GENERIC_TERMIOS_BASE_H + +#include + +#ifndef __ARCH_TERMIO_GETPUT + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +static inline int user_termio_to_kernel_termios(struct ktermios *termios, + struct termio __user *termio) +{ + unsigned short tmp; + + if (get_user(tmp, &termio->c_iflag) < 0) + goto fault; + termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; + + if (get_user(tmp, &termio->c_oflag) < 0) + goto fault; + termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; + + if (get_user(tmp, &termio->c_cflag) < 0) + goto fault; + termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; + + if (get_user(tmp, &termio->c_lflag) < 0) + goto fault; + termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; + + if (get_user(termios->c_line, &termio->c_line) < 0) + goto fault; + + if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) + goto fault; + + return 0; + + fault: + return -EFAULT; +} + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +static inline int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || + put_user(termios->c_oflag, &termio->c_oflag) < 0 || + put_user(termios->c_cflag, &termio->c_cflag) < 0 || + put_user(termios->c_lflag, &termio->c_lflag) < 0 || + put_user(termios->c_line, &termio->c_line) < 0 || + copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) + return -EFAULT; + + return 0; +} + +#ifndef user_termios_to_kernel_termios +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) +#endif + +#ifndef kernel_termios_to_user_termios +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) +#endif + +#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) + +#endif /* __ARCH_TERMIO_GETPUT */ + +#endif /* _ASM_GENERIC_TERMIOS_BASE_H */ diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h deleted file mode 100644 index 7d39ecc92d94..000000000000 --- a/include/asm-generic/termios.h +++ /dev/null @@ -1,77 +0,0 @@ -/* termios.h: generic termios/termio user copying/translation - */ - -#ifndef _ASM_GENERIC_TERMIOS_H -#define _ASM_GENERIC_TERMIOS_H - -#include - -#ifndef __ARCH_TERMIO_GETPUT - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -static inline int user_termio_to_kernel_termios(struct ktermios *termios, - struct termio __user *termio) -{ - unsigned short tmp; - - if (get_user(tmp, &termio->c_iflag) < 0) - goto fault; - termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; - - if (get_user(tmp, &termio->c_oflag) < 0) - goto fault; - termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; - - if (get_user(tmp, &termio->c_cflag) < 0) - goto fault; - termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; - - if (get_user(tmp, &termio->c_lflag) < 0) - goto fault; - termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; - - if (get_user(termios->c_line, &termio->c_line) < 0) - goto fault; - - if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) - goto fault; - - return 0; - - fault: - return -EFAULT; -} - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -static inline int kernel_termios_to_user_termio(struct termio __user *termio, - struct ktermios *termios) -{ - if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || - put_user(termios->c_oflag, &termio->c_oflag) < 0 || - put_user(termios->c_cflag, &termio->c_cflag) < 0 || - put_user(termios->c_lflag, &termio->c_lflag) < 0 || - put_user(termios->c_line, &termio->c_line) < 0 || - copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) - return -EFAULT; - - return 0; -} - -#ifndef user_termios_to_kernel_termios -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) -#endif - -#ifndef kernel_termios_to_user_termios -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) -#endif - -#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) - -#endif /* __ARCH_TERMIO_GETPUT */ - -#endif /* _ASM_GENERIC_TERMIOS_H */ -- cgit v1.2.3 From c31ae4bb4a9fa4606a74c0a4fb61b74f804e861e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:25 +0000 Subject: asm-generic: introduce asm/bitsperlong.h This provides a reliable way for asm-generic/types.h and other files to find out if it is running on a 32 or 64 bit platform. We cannot use CONFIG_64BIT for this in headers that are included from user space because CONFIG symbols are not available there. We also cannot do it inside of asm/types.h because some headers need the word size but cannot include types.h. The solution is to introduce a new header that defines both __BITS_PER_LONG for user space and BITS_PER_LONG for usage in the kernel. The asm-generic version falls back to 32 bit unless the architecture overrides it, which I did for all 64 bit platforms. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 1 + include/asm-generic/Kbuild.asm | 1 + include/asm-generic/bitsperlong.h | 32 ++++++++++++++++++++++++++++++++ include/asm-generic/int-l64.h | 2 ++ include/asm-generic/int-ll64.h | 2 ++ 5 files changed, 38 insertions(+) create mode 100644 include/asm-generic/bitsperlong.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 460b08d51e2e..cbb437875f5c 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,3 +1,4 @@ +header-y += bitsperlong.h header-y += errno-base.h header-y += errno.h header-y += fcntl.h diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index 70d185534b9d..290910e4ede4 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -9,6 +9,7 @@ unifdef-y += a.out.h endif unifdef-y += auxvec.h unifdef-y += byteorder.h +unifdef-y += bitsperlong.h unifdef-y += errno.h unifdef-y += fcntl.h unifdef-y += ioctl.h diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h new file mode 100644 index 000000000000..4ae54e07de83 --- /dev/null +++ b/include/asm-generic/bitsperlong.h @@ -0,0 +1,32 @@ +#ifndef __ASM_GENERIC_BITS_PER_LONG +#define __ASM_GENERIC_BITS_PER_LONG + +/* + * There seems to be no way of detecting this automatically from user + * space, so 64 bit architectures should override this in their + * bitsperlong.h. In particular, an architecture that supports + * both 32 and 64 bit user space must not rely on CONFIG_64BIT + * to decide it, but rather check a compiler provided macro. + */ +#ifndef __BITS_PER_LONG +#define __BITS_PER_LONG 32 +#endif + +#ifdef __KERNEL__ + +#ifdef CONFIG_64BIT +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif /* CONFIG_64BIT */ + +/* + * FIXME: The check currently breaks x86-64 build, so it's + * temporarily disabled. Please fix x86-64 and reenable + */ +#if 0 && BITS_PER_LONG != __BITS_PER_LONG +#error Inconsistent word size. Check asm/bitsperlong.h +#endif + +#endif /* __KERNEL__ */ +#endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/include/asm-generic/int-l64.h b/include/asm-generic/int-l64.h index 2af9b75d77db..1ca3efc976cc 100644 --- a/include/asm-generic/int-l64.h +++ b/include/asm-generic/int-l64.h @@ -8,6 +8,8 @@ #ifndef _ASM_GENERIC_INT_L64_H #define _ASM_GENERIC_INT_L64_H +#include + #ifndef __ASSEMBLY__ /* * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h index f9bc9ac29b36..f394147c0739 100644 --- a/include/asm-generic/int-ll64.h +++ b/include/asm-generic/int-ll64.h @@ -8,6 +8,8 @@ #ifndef _ASM_GENERIC_INT_LL64_H #define _ASM_GENERIC_INT_LL64_H +#include + #ifndef __ASSEMBLY__ /* * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the -- cgit v1.2.3 From 2864d32be31a20a4617e37a857dd9915a57e2efb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:26 +0000 Subject: asm-generic: add generic sysv ipc headers The ipc64 data structures were originally meant to be architecture specific so that each architecture could add their own optimizations for padding. In the end, most of them just copied the x86 version, and most got that wrong. UClibc expects the x86 anyway, so we might just declare that the default and get rid of the extra copies. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 5 ++++ include/asm-generic/ipcbuf.h | 34 ++++++++++++++++++++++++ include/asm-generic/msgbuf.h | 47 +++++++++++++++++++++++++++++++++ include/asm-generic/sembuf.h | 38 +++++++++++++++++++++++++++ include/asm-generic/shmbuf.h | 59 ++++++++++++++++++++++++++++++++++++++++++ include/asm-generic/shmparam.h | 6 +++++ 6 files changed, 189 insertions(+) create mode 100644 include/asm-generic/ipcbuf.h create mode 100644 include/asm-generic/msgbuf.h create mode 100644 include/asm-generic/sembuf.h create mode 100644 include/asm-generic/shmbuf.h create mode 100644 include/asm-generic/shmparam.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index cbb437875f5c..fd7a9c49df4e 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -3,8 +3,13 @@ header-y += errno-base.h header-y += errno.h header-y += fcntl.h header-y += ioctl.h +header-y += ipcbuf.h header-y += mman-common.h +header-y += msgbuf.h header-y += poll.h +header-y += sembuf.h +header-y += shmbuf.h +header-y += shmparam.h header-y += signal-defs.h header-y += statfs.h diff --git a/include/asm-generic/ipcbuf.h b/include/asm-generic/ipcbuf.h new file mode 100644 index 000000000000..76982b2a1b58 --- /dev/null +++ b/include/asm-generic/ipcbuf.h @@ -0,0 +1,34 @@ +#ifndef __ASM_GENERIC_IPCBUF_H +#define __ASM_GENERIC_IPCBUF_H + +/* + * The generic ipc64_perm structure: + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * ipc64_perm was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * Pad space is left for: + * - 32-bit mode_t on architectures that only had 16 bit + * - 32-bit seq + * - 2 miscellaneous 32-bit values + */ + +struct ipc64_perm { + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + /* pad if mode_t is u16: */ + unsigned char __pad1[4 - sizeof(__kernel_mode_t)]; + unsigned short seq; + unsigned short __pad2; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __ASM_GENERIC_IPCBUF_H */ diff --git a/include/asm-generic/msgbuf.h b/include/asm-generic/msgbuf.h new file mode 100644 index 000000000000..aec850d9159e --- /dev/null +++ b/include/asm-generic/msgbuf.h @@ -0,0 +1,47 @@ +#ifndef __ASM_GENERIC_MSGBUF_H +#define __ASM_GENERIC_MSGBUF_H + +#include +/* + * generic msqid64_ds structure. + * + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * msqid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first three padding words. + * On big-endian systems, the padding is in the wrong place. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused1; +#endif + __kernel_time_t msg_rtime; /* last msgrcv time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused2; +#endif + __kernel_time_t msg_ctime; /* last change time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused3; +#endif + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* __ASM_GENERIC_MSGBUF_H */ diff --git a/include/asm-generic/sembuf.h b/include/asm-generic/sembuf.h new file mode 100644 index 000000000000..4cb2c13e5090 --- /dev/null +++ b/include/asm-generic/sembuf.h @@ -0,0 +1,38 @@ +#ifndef __ASM_GENERIC_SEMBUF_H +#define __ASM_GENERIC_SEMBUF_H + +#include + +/* + * The semid64_ds structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * semid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first two padding words. + * On big-endian systems, the padding is in the wrong place. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused1; +#endif + __kernel_time_t sem_ctime; /* last change time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused2; +#endif + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_GENERIC_SEMBUF_H */ diff --git a/include/asm-generic/shmbuf.h b/include/asm-generic/shmbuf.h new file mode 100644 index 000000000000..5768fa60ac82 --- /dev/null +++ b/include/asm-generic/shmbuf.h @@ -0,0 +1,59 @@ +#ifndef __ASM_GENERIC_SHMBUF_H +#define __ASM_GENERIC_SHMBUF_H + +#include + +/* + * The shmid64_ds structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * shmid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first two padding words. + * On big-endian systems, the padding is in the wrong place. + * + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused1; +#endif + __kernel_time_t shm_dtime; /* last detach time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused2; +#endif + __kernel_time_t shm_ctime; /* last change time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused3; +#endif + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_GENERIC_SHMBUF_H */ diff --git a/include/asm-generic/shmparam.h b/include/asm-generic/shmparam.h new file mode 100644 index 000000000000..51a3852de733 --- /dev/null +++ b/include/asm-generic/shmparam.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_SHMPARAM_H +#define __ASM_GENERIC_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASM_GENERIC_SHMPARAM_H */ -- cgit v1.2.3 From 6103ec56c65c33774c7c38652c8204120c3c7519 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:27 +0000 Subject: asm-generic: add generic ABI headers These header files are typically copied from an existing architecture into any new one, slightly modified and then remain untouched until the end of time in the name of ABI stability. To make it easier for future architectures, provide a sane generic version here. In cases where multiple architectures already use identical code, I used the most common version. In cases like stat.h that are more or less broken everywhere, I provide a version that is meant to be ideal for new architectures. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 15 +++ include/asm-generic/auxvec.h | 8 ++ include/asm-generic/ioctls.h | 110 +++++++++++++++++++++ include/asm-generic/mman.h | 18 ++++ include/asm-generic/param.h | 24 +++++ include/asm-generic/posix_types.h | 165 +++++++++++++++++++++++++++++++ include/asm-generic/setup.h | 6 ++ include/asm-generic/signal.h | 131 +++++++++++++++++++++++++ include/asm-generic/socket.h | 63 ++++++++++++ include/asm-generic/sockios.h | 13 +++ include/asm-generic/stat.h | 72 ++++++++++++++ include/asm-generic/swab.h | 18 ++++ include/asm-generic/termbits.h | 198 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/termios.h | 154 +++++++++++++++++++++++++++++ include/asm-generic/types.h | 42 ++++++++ include/asm-generic/ucontext.h | 12 +++ 16 files changed, 1049 insertions(+) create mode 100644 include/asm-generic/auxvec.h create mode 100644 include/asm-generic/ioctls.h create mode 100644 include/asm-generic/mman.h create mode 100644 include/asm-generic/param.h create mode 100644 include/asm-generic/posix_types.h create mode 100644 include/asm-generic/setup.h create mode 100644 include/asm-generic/signal.h create mode 100644 include/asm-generic/socket.h create mode 100644 include/asm-generic/sockios.h create mode 100644 include/asm-generic/stat.h create mode 100644 include/asm-generic/swab.h create mode 100644 include/asm-generic/termbits.h create mode 100644 include/asm-generic/termios.h create mode 100644 include/asm-generic/types.h create mode 100644 include/asm-generic/ucontext.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index fd7a9c49df4e..11a78b8e2fc9 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,17 +1,32 @@ +header-y += auxvec.h header-y += bitsperlong.h header-y += errno-base.h header-y += errno.h header-y += fcntl.h header-y += ioctl.h +header-y += ioctls.h header-y += ipcbuf.h header-y += mman-common.h +header-y += mman.h header-y += msgbuf.h +header-y += param.h header-y += poll.h +header-y += posix_types.h header-y += sembuf.h +header-y += setup.h header-y += shmbuf.h header-y += shmparam.h header-y += signal-defs.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h header-y += statfs.h +header-y += swab.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += ucontext.h unifdef-y += int-l64.h unifdef-y += int-ll64.h diff --git a/include/asm-generic/auxvec.h b/include/asm-generic/auxvec.h new file mode 100644 index 000000000000..b99573b0ad12 --- /dev/null +++ b/include/asm-generic/auxvec.h @@ -0,0 +1,8 @@ +#ifndef __ASM_GENERIC_AUXVEC_H +#define __ASM_GENERIC_AUXVEC_H +/* + * Not all architectures need their own auxvec.h, the most + * common definitions are already in linux/auxvec.h. + */ + +#endif /* __ASM_GENERIC_AUXVEC_H */ diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h new file mode 100644 index 000000000000..a799e20a769e --- /dev/null +++ b/include/asm-generic/ioctls.h @@ -0,0 +1,110 @@ +#ifndef __ASM_GENERIC_IOCTLS_H +#define __ASM_GENERIC_IOCTLS_H + +#include + +/* + * These are the most common definitions for tty ioctl numbers. + * Most of them do not use the recommended _IOC(), but there is + * probably some source code out there hardcoding the number, + * so we might as well use them for all new platforms. + * + * The architectures that use different values here typically + * try to be compatible with some Unix variants for the same + * architecture. + */ + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TCGETS2 _IOR('T', 0x2A, struct termios2) +#define TCSETS2 _IOW('T', 0x2B, struct termios2) +#define TCSETSW2 _IOW('T', 0x2C, struct termios2) +#define TCSETSF2 _IOW('T', 0x2D, struct termios2) +#define TIOCGRS485 0x542E +#define TIOCSRS485 0x542F +#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ +#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ +#define TCSETX 0x5433 +#define TCSETXF 0x5434 +#define TCSETXW 0x5435 + +#define FIONCLEX 0x5450 +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +/* + * some architectures define FIOQSIZE as 0x545E, which is used for + * TIOCGHAYESESP on others + */ +#ifndef FIOQSIZE +# define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +# define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +# define FIOQSIZE 0x5460 +#endif + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* __ASM_GENERIC_IOCTLS_H */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h new file mode 100644 index 000000000000..7cab4de2bca6 --- /dev/null +++ b/include/asm-generic/mman.h @@ -0,0 +1,18 @@ +#ifndef __ASM_GENERIC_MMAN_H +#define __ASM_GENERIC_MMAN_H + +#include + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* __ASM_GENERIC_MMAN_H */ diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h new file mode 100644 index 000000000000..cdf8251bfb6c --- /dev/null +++ b/include/asm-generic/param.h @@ -0,0 +1,24 @@ +#ifndef __ASM_GENERIC_PARAM_H +#define __ASM_GENERIC_PARAM_H + +#ifdef __KERNEL__ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ +# define USER_HZ 100 /* some user interfaces are */ +# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ +#endif + +#ifndef HZ +#define HZ 100 +#endif + +#ifndef EXEC_PAGESIZE +#define EXEC_PAGESIZE 4096 +#endif + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#endif /* __ASM_GENERIC_PARAM_H */ diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h new file mode 100644 index 000000000000..3dab00860e71 --- /dev/null +++ b/include/asm-generic/posix_types.h @@ -0,0 +1,165 @@ +#ifndef __ASM_GENERIC_POSIX_TYPES_H +#define __ASM_GENERIC_POSIX_TYPES_H + +#include +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. + * + * First the types that are often defined in different ways across + * architectures, so that you can override them. + */ + +#ifndef __kernel_ino_t +typedef unsigned long __kernel_ino_t; +#endif + +#ifndef __kernel_mode_t +typedef unsigned int __kernel_mode_t; +#endif + +#ifndef __kernel_nlink_t +typedef unsigned long __kernel_nlink_t; +#endif + +#ifndef __kernel_pid_t +typedef int __kernel_pid_t; +#endif + +#ifndef __kernel_ipc_pid_t +typedef int __kernel_ipc_pid_t; +#endif + +#ifndef __kernel_uid_t +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +#endif + +#ifndef __kernel_suseconds_t +typedef long __kernel_suseconds_t; +#endif + +#ifndef __kernel_daddr_t +typedef int __kernel_daddr_t; +#endif + +#ifndef __kernel_uid32_t +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; +#endif + +#ifndef __kernel_old_uid_t +typedef __kernel_uid_t __kernel_old_uid_t; +typedef __kernel_gid_t __kernel_old_gid_t; +#endif + +#ifndef __kernel_old_dev_t +typedef unsigned int __kernel_old_dev_t; +#endif + +/* + * Most 32 bit architectures use "unsigned int" size_t, + * and all 64 bit architectures use "unsigned long" size_t. + */ +#ifndef __kernel_size_t +#if __BITS_PER_LONG != 64 +typedef unsigned int __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +#else +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +#endif +#endif + +/* + * anything below here should be completely generic + */ +typedef long __kernel_off_t; +typedef long long __kernel_loff_t; +typedef long __kernel_time_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +#ifdef __KERNEL__ + +#undef __FD_SET +static inline void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] |= (1UL<<__rem); +} + +#undef __FD_CLR +static inline void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem); +} + +#undef __FD_ISSET +static inline int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static inline void __FD_ZERO(__kernel_fd_set *__p) +{ + unsigned long *__tmp = __p->fds_bits; + int __i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + __tmp[ 8] = 0; __tmp[ 9] = 0; + __tmp[10] = 0; __tmp[11] = 0; + __tmp[12] = 0; __tmp[13] = 0; + __tmp[14] = 0; __tmp[15] = 0; + return; + + case 8: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + return; + + case 4: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + return; + } + } + __i = __FDSET_LONGS; + while (__i) { + __i--; + *__tmp = 0; + __tmp++; + } +} + +#endif /* __KERNEL__ */ + +#endif /* __ASM_GENERIC_POSIX_TYPES_H */ diff --git a/include/asm-generic/setup.h b/include/asm-generic/setup.h new file mode 100644 index 000000000000..6fc26a51003c --- /dev/null +++ b/include/asm-generic/setup.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_SETUP_H +#define __ASM_GENERIC_SETUP_H + +#define COMMAND_LINE_SIZE 512 + +#endif /* __ASM_GENERIC_SETUP_H */ diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h new file mode 100644 index 000000000000..555c0aee8a47 --- /dev/null +++ b/include/asm-generic/signal.h @@ -0,0 +1,131 @@ +#ifndef __ASM_GENERIC_SIGNAL_H +#define __ASM_GENERIC_SIGNAL_H + +#include + +#define _NSIG 64 +#define _NSIG_BPW __BITS_PER_LONG +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#ifndef SIGRTMAX +#define SIGRTMAX _NSIG +#endif + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +/* + * New architectures should not define the obsolete + * SA_RESTORER 0x04000000 + */ + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#ifndef __ASSEMBLY__ +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +/* not actually used, but required for linux/syscalls.h */ +typedef unsigned long old_sigset_t; + +#include + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; +#ifdef SA_RESTORER + __sigrestore_t sa_restorer; +#endif + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ + +#include +#undef __HAVE_ARCH_SIG_BITOPS + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_GENERIC_SIGNAL_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h new file mode 100644 index 000000000000..5d79e409241c --- /dev/null +++ b/include/asm-generic/socket.h @@ -0,0 +1,63 @@ +#ifndef __ASM_GENERIC_SOCKET_H +#define __ASM_GENERIC_SOCKET_H + +#include + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ + +#ifndef SO_PASSCRED /* powerpc only differs in these */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 +#endif + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + +#endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/asm-generic/sockios.h b/include/asm-generic/sockios.h new file mode 100644 index 000000000000..9a61a369b901 --- /dev/null +++ b/include/asm-generic/sockios.h @@ -0,0 +1,13 @@ +#ifndef __ASM_GENERIC_SOCKIOS_H +#define __ASM_GENERIC_SOCKIOS_H + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ + +#endif /* __ASM_GENERIC_SOCKIOS_H */ diff --git a/include/asm-generic/stat.h b/include/asm-generic/stat.h new file mode 100644 index 000000000000..47e64170305d --- /dev/null +++ b/include/asm-generic/stat.h @@ -0,0 +1,72 @@ +#ifndef __ASM_GENERIC_STAT_H +#define __ASM_GENERIC_STAT_H + +/* + * Everybody gets this wrong and has to stick with it for all + * eternity. Hopefully, this version gets used by new architectures + * so they don't fall into the same traps. + * + * stat64 is copied from powerpc64, with explicit padding added. + * stat is the same structure layout on 64-bit, without the 'long long' + * types. + * + * By convention, 64 bit architectures use the stat interface, while + * 32 bit architectures use the stat64 interface. Note that we don't + * provide an __old_kernel_stat here, which new architecture should + * not have to start with. + */ + +#include + +#define STAT_HAVE_NSEC 1 + +struct stat { + unsigned long st_dev; /* Device. */ + unsigned long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long st_rdev; /* Device number, if device. */ + unsigned long __pad1; + long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + int __pad2; + long st_blocks; /* Number 512-byte blocks allocated. */ + int st_atime; /* Time of last access. */ + unsigned int st_atime_nsec; + int st_mtime; /* Time of last modification. */ + unsigned int st_mtime_nsec; + int st_ctime; /* Time of last status change. */ + unsigned int st_ctime_nsec; + unsigned int __unused4; + unsigned int __unused5; +}; + +#if __BITS_PER_LONG != 64 +/* This matches struct stat64 in glibc2.1. Only used for 32 bit. */ +struct stat64 { + unsigned long long st_dev; /* Device. */ + unsigned long long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long long st_rdev; /* Device number, if device. */ + unsigned long long __pad1; + long long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + int __pad2; + long long st_blocks; /* Number 512-byte blocks allocated. */ + int st_atime; /* Time of last access. */ + unsigned int st_atime_nsec; + int st_mtime; /* Time of last modification. */ + unsigned int st_mtime_nsec; + int st_ctime; /* Time of last status change. */ + unsigned int st_ctime_nsec; + unsigned int __unused4; + unsigned int __unused5; +}; +#endif + +#endif /* __ASM_GENERIC_STAT_H */ diff --git a/include/asm-generic/swab.h b/include/asm-generic/swab.h new file mode 100644 index 000000000000..a8e9029d9eba --- /dev/null +++ b/include/asm-generic/swab.h @@ -0,0 +1,18 @@ +#ifndef _ASM_GENERIC_SWAB_H +#define _ASM_GENERIC_SWAB_H + +#include + +/* + * 32 bit architectures typically (but not always) want to + * set __SWAB_64_THRU_32__. In user space, this is only + * valid if the compiler supports 64 bit data types. + */ + +#if __BITS_PER_LONG == 32 +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__) +#define __SWAB_64_THRU_32__ +#endif +#endif + +#endif /* _ASM_GENERIC_SWAB_H */ diff --git a/include/asm-generic/termbits.h b/include/asm-generic/termbits.h new file mode 100644 index 000000000000..1c9773d48cb0 --- /dev/null +++ b/include/asm-generic/termbits.h @@ -0,0 +1,198 @@ +#ifndef __ASM_GENERIC_TERMBITS_H +#define __ASM_GENERIC_TERMBITS_H + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +struct ktermios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define BOTHER 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* __ASM_GENERIC_TERMBITS_H */ diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h new file mode 100644 index 000000000000..d0922adc56d4 --- /dev/null +++ b/include/asm-generic/termios.h @@ -0,0 +1,154 @@ +#ifndef _ASM_GENERIC_TERMIOS_H +#define _ASM_GENERIC_TERMIOS_H +/* + * Most architectures have straight copies of the x86 code, with + * varying levels of bug fixes on top. Usually it's a good idea + * to use this generic version instead, but be careful to avoid + * ABI changes. + * New architectures should not provide their own version. + */ + +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +#ifdef __KERNEL__ + +#include + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +static inline int user_termio_to_kernel_termios(struct ktermios *termios, + const struct termio __user *termio) +{ + unsigned short tmp; + + if (get_user(tmp, &termio->c_iflag) < 0) + goto fault; + termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; + + if (get_user(tmp, &termio->c_oflag) < 0) + goto fault; + termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; + + if (get_user(tmp, &termio->c_cflag) < 0) + goto fault; + termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; + + if (get_user(tmp, &termio->c_lflag) < 0) + goto fault; + termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; + + if (get_user(termios->c_line, &termio->c_line) < 0) + goto fault; + + if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) + goto fault; + + return 0; + + fault: + return -EFAULT; +} + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +static inline int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || + put_user(termios->c_oflag, &termio->c_oflag) < 0 || + put_user(termios->c_cflag, &termio->c_cflag) < 0 || + put_user(termios->c_lflag, &termio->c_lflag) < 0 || + put_user(termios->c_line, &termio->c_line) < 0 || + copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) + return -EFAULT; + + return 0; +} + +#ifdef TCGETS2 +static inline int user_termios_to_kernel_termios(struct ktermios *k, + struct termios2 __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios2)); +} + +static inline int kernel_termios_to_user_termios(struct termios2 __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios2)); +} + +static inline int user_termios_to_kernel_termios_1(struct ktermios *k, + struct termios __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios)); +} + +static inline int kernel_termios_to_user_termios_1(struct termios __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios)); +} +#else /* TCGETS2 */ +static inline int user_termios_to_kernel_termios(struct ktermios *k, + struct termios __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios)); +} + +static inline int kernel_termios_to_user_termios(struct termios __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios)); +} +#endif /* TCGETS2 */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_GENERIC_TERMIOS_H */ diff --git a/include/asm-generic/types.h b/include/asm-generic/types.h new file mode 100644 index 000000000000..fba7d33ca3f2 --- /dev/null +++ b/include/asm-generic/types.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_TYPES_H +#define _ASM_GENERIC_TYPES_H +/* + * int-ll64 is used practically everywhere now, + * so use it as a reasonable default. + */ +#include + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +/* + * DMA addresses may be very different from physical addresses + * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit + * systems, while sparc64 uses 32 bit DMA addresses for 64 bit + * physical addresses. + * This default defines dma_addr_t to have the same size as + * phys_addr_t, which is the most common way. + * Do not define the dma64_addr_t type, which never really + * worked. + */ +#ifndef dma_addr_t +#ifdef CONFIG_PHYS_ADDR_T_64BIT +typedef u64 dma_addr_t; +#else +typedef u32 dma_addr_t; +#endif /* CONFIG_PHYS_ADDR_T_64BIT */ +#endif /* dma_addr_t */ + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_GENERIC_TYPES_H */ diff --git a/include/asm-generic/ucontext.h b/include/asm-generic/ucontext.h new file mode 100644 index 000000000000..ad77343e8a9a --- /dev/null +++ b/include/asm-generic/ucontext.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_UCONTEXT_H +#define __ASM_GENERIC_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* __ASM_GENERIC_UCONTEXT_H */ -- cgit v1.2.3 From e64a1617eca39d62b248a11699de9c1195369661 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:28 +0000 Subject: asm-generic: add a generic unistd.h A new architecture should only define a minimal set of system calls while still providing the full functionality. This version of unistd.h has gone through intensive review to make sure that by default it only enables syscalls that do not already have a more featureful replacement. It is modeled after the x86-64 version of unistd.h, which unifies the syscall number definition and the actual system call table in a single file, in order to keep them synchronized much more easily. This first version still keeps legacy system call definitions around, guarded by various #ifdefs, and with numbers larger than 1024. The idea behind this is to make it easier for new architectures to transition from a full list to the reduced set. In particular, the new microblaze architecture that should migrate to using the generic ABI headers can at least use an existing uClibc source tree without major rewrites during the conversion. Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 1 + include/asm-generic/unistd.h | 854 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 855 insertions(+) create mode 100644 include/asm-generic/unistd.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 11a78b8e2fc9..eb62334cda29 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -27,6 +27,7 @@ header-y += termbits.h header-y += termios.h header-y += types.h header-y += ucontext.h +header-y += unistd.h unifdef-y += int-l64.h unifdef-y += int-ll64.h diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h new file mode 100644 index 000000000000..5b34b6233d6d --- /dev/null +++ b/include/asm-generic/unistd.h @@ -0,0 +1,854 @@ +#if !defined(_ASM_GENERIC_UNISTD_H) || defined(__SYSCALL) +#define _ASM_GENERIC_UNISTD_H + +#include + +/* + * This file contains the system call numbers, based on the + * layout of the x86-64 architecture, which embeds the + * pointer to the syscall in the table. + * + * As a basic principle, no duplication of functionality + * should be added, e.g. we don't use lseek when llseek + * is present. New architectures should use this file + * and implement the less feature-full calls in user space. + */ + +#ifndef __SYSCALL +#define __SYSCALL(x, y) +#endif + +#if __BITS_PER_LONG == 32 +#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32) +#else +#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64) +#endif + +#define __NR_io_setup 0 +__SYSCALL(__NR_io_setup, sys_io_setup) +#define __NR_io_destroy 1 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_submit 2 +__SYSCALL(__NR_io_submit, sys_io_submit) +#define __NR_io_cancel 3 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_io_getevents 4 +__SYSCALL(__NR_io_getevents, sys_io_getevents) + +/* fs/xattr.c */ +#define __NR_setxattr 5 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 6 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 7 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 8 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 9 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 10 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 11 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 12 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 13 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 14 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 15 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 16 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) + +/* fs/dcache.c */ +#define __NR_getcwd 17 +__SYSCALL(__NR_getcwd, sys_getcwd) + +/* fs/cookies.c */ +#define __NR_lookup_dcookie 18 +__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) + +/* fs/eventfd.c */ +#define __NR_eventfd2 19 +__SYSCALL(__NR_eventfd2, sys_eventfd2) + +/* fs/eventpoll.c */ +#define __NR_epoll_create1 20 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_epoll_ctl 21 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_epoll_pwait 22 +__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) + +/* fs/fcntl.c */ +#define __NR_dup 23 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_dup3 24 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR3264_fcntl 25 +__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl) + +/* fs/inotify_user.c */ +#define __NR_inotify_init1 26 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) +#define __NR_inotify_add_watch 27 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 28 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) + +/* fs/ioctl.c */ +#define __NR_ioctl 29 +__SYSCALL(__NR_ioctl, sys_ioctl) + +/* fs/ioprio.c */ +#define __NR_ioprio_set 30 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 31 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) + +/* fs/locks.c */ +#define __NR_flock 32 +__SYSCALL(__NR_flock, sys_flock) + +/* fs/namei.c */ +#define __NR_mknodat 33 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_mkdirat 34 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_unlinkat 35 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_symlinkat 36 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_linkat 37 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_renameat 38 +__SYSCALL(__NR_renameat, sys_renameat) + +/* fs/namespace.c */ +#define __NR_umount2 39 +__SYSCALL(__NR_umount2, sys_umount) +#define __NR_mount 40 +__SYSCALL(__NR_mount, sys_mount) +#define __NR_pivot_root 41 +__SYSCALL(__NR_pivot_root, sys_pivot_root) + +/* fs/nfsctl.c */ +#define __NR_nfsservctl 42 +__SYSCALL(__NR_nfsservctl, sys_nfsservctl) + +/* fs/open.c */ +#define __NR3264_statfs 43 +__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs) +#define __NR3264_fstatfs 44 +__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs) +#define __NR3264_truncate 45 +__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate) +#define __NR3264_ftruncate 46 +__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate) + +#define __NR_fallocate 47 +__SYSCALL(__NR_fallocate, sys_fallocate) +#define __NR_faccessat 48 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_chdir 49 +__SYSCALL(__NR_chdir, sys_chdir) +#define __NR_fchdir 50 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_chroot 51 +__SYSCALL(__NR_chroot, sys_chroot) +#define __NR_fchmod 52 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_fchmodat 53 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_fchownat 54 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_fchown 55 +__SYSCALL(__NR_fchown, sys_fchown) +#define __NR_openat 56 +__SYSCALL(__NR_openat, sys_openat) +#define __NR_close 57 +__SYSCALL(__NR_close, sys_close) +#define __NR_vhangup 58 +__SYSCALL(__NR_vhangup, sys_vhangup) + +/* fs/pipe.c */ +#define __NR_pipe2 59 +__SYSCALL(__NR_pipe2, sys_pipe2) + +/* fs/quota.c */ +#define __NR_quotactl 60 +__SYSCALL(__NR_quotactl, sys_quotactl) + +/* fs/readdir.c */ +#define __NR_getdents64 61 +__SYSCALL(__NR_getdents64, sys_getdents64) + +/* fs/read_write.c */ +#define __NR3264_lseek 62 +__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek) +#define __NR_read 63 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 64 +__SYSCALL(__NR_write, sys_write) +#define __NR_readv 65 +__SYSCALL(__NR_readv, sys_readv) +#define __NR_writev 66 +__SYSCALL(__NR_writev, sys_writev) +#define __NR_pread64 67 +__SYSCALL(__NR_pread64, sys_pread64) +#define __NR_pwrite64 68 +__SYSCALL(__NR_pwrite64, sys_pwrite64) +#define __NR_preadv 69 +__SYSCALL(__NR_preadv, sys_preadv) +#define __NR_pwritev 70 +__SYSCALL(__NR_pwritev, sys_pwritev) + +/* fs/sendfile.c */ +#define __NR3264_sendfile 71 +__SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile) + +/* fs/select.c */ +#define __NR_pselect6 72 +__SYSCALL(__NR_pselect6, sys_pselect6) +#define __NR_ppoll 73 +__SYSCALL(__NR_ppoll, sys_ppoll) + +/* fs/signalfd.c */ +#define __NR_signalfd4 74 +__SYSCALL(__NR_signalfd4, sys_signalfd4) + +/* fs/splice.c */ +#define __NR_vmsplice 75 +__SYSCALL(__NR_vmsplice, sys_vmsplice) +#define __NR_splice 76 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_tee 77 +__SYSCALL(__NR_tee, sys_tee) + +/* fs/stat.c */ +#define __NR_readlinkat 78 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR3264_fstatat 79 +__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) +#define __NR3264_fstat 80 +__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat) + +/* fs/sync.c */ +#define __NR_sync 81 +__SYSCALL(__NR_sync, sys_sync) +#define __NR_fsync 82 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_fdatasync 83 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR_sync_file_range 84 +__SYSCALL(__NR_sync_file_range, sys_sync_file_range) /* .long sys_sync_file_range2, */ + +/* fs/timerfd.c */ +#define __NR_timerfd_create 85 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_timerfd_settime 86 +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) +#define __NR_timerfd_gettime 87 +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) + +/* fs/utimes.c */ +#define __NR_utimensat 88 +__SYSCALL(__NR_utimensat, sys_utimensat) + +/* kernel/acct.c */ +#define __NR_acct 89 +__SYSCALL(__NR_acct, sys_acct) + +/* kernel/capability.c */ +#define __NR_capget 90 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 91 +__SYSCALL(__NR_capset, sys_capset) + +/* kernel/exec_domain.c */ +#define __NR_personality 92 +__SYSCALL(__NR_personality, sys_personality) + +/* kernel/exit.c */ +#define __NR_exit 93 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_exit_group 94 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_waitid 95 +__SYSCALL(__NR_waitid, sys_waitid) + +/* kernel/fork.c */ +#define __NR_set_tid_address 96 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_unshare 97 +__SYSCALL(__NR_unshare, sys_unshare) + +/* kernel/futex.c */ +#define __NR_futex 98 +__SYSCALL(__NR_futex, sys_futex) +#define __NR_set_robust_list 99 +__SYSCALL(__NR_set_robust_list, sys_set_robust_list) +#define __NR_get_robust_list 100 +__SYSCALL(__NR_get_robust_list, sys_get_robust_list) + +/* kernel/hrtimer.c */ +#define __NR_nanosleep 101 +__SYSCALL(__NR_nanosleep, sys_nanosleep) + +/* kernel/itimer.c */ +#define __NR_getitimer 102 +__SYSCALL(__NR_getitimer, sys_getitimer) +#define __NR_setitimer 103 +__SYSCALL(__NR_setitimer, sys_setitimer) + +/* kernel/kexec.c */ +#define __NR_kexec_load 104 +__SYSCALL(__NR_kexec_load, sys_kexec_load) + +/* kernel/module.c */ +#define __NR_init_module 105 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 106 +__SYSCALL(__NR_delete_module, sys_delete_module) + +/* kernel/posix-timers.c */ +#define __NR_timer_create 107 +__SYSCALL(__NR_timer_create, sys_timer_create) +#define __NR_timer_gettime 108 +__SYSCALL(__NR_timer_gettime, sys_timer_gettime) +#define __NR_timer_getoverrun 109 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_settime 110 +__SYSCALL(__NR_timer_settime, sys_timer_settime) +#define __NR_timer_delete 111 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 112 +__SYSCALL(__NR_clock_settime, sys_clock_settime) +#define __NR_clock_gettime 113 +__SYSCALL(__NR_clock_gettime, sys_clock_gettime) +#define __NR_clock_getres 114 +__SYSCALL(__NR_clock_getres, sys_clock_getres) +#define __NR_clock_nanosleep 115 +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) + +/* kernel/printk.c */ +#define __NR_syslog 116 +__SYSCALL(__NR_syslog, sys_syslog) + +/* kernel/ptrace.c */ +#define __NR_ptrace 117 +__SYSCALL(__NR_ptrace, sys_ptrace) + +/* kernel/sched.c */ +#define __NR_sched_setparam 118 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_setscheduler 119 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 120 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_getparam 121 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setaffinity 122 +__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) +#define __NR_sched_getaffinity 123 +__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) +#define __NR_sched_yield 124 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_sched_get_priority_max 125 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 126 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 127 +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) + +/* kernel/signal.c */ +#define __NR_restart_syscall 128 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_kill 129 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_tkill 130 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_tgkill 131 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_sigaltstack 132 +__SYSCALL(__NR_sigaltstack, sys_sigaltstack) +#define __NR_rt_sigsuspend 133 +__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ +#define __NR_rt_sigaction 134 +__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */ +#define __NR_rt_sigprocmask 135 +__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) +#define __NR_rt_sigpending 136 +__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) +#define __NR_rt_sigtimedwait 137 +__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 138 +__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) +#define __NR_rt_sigreturn 139 +__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */ + +/* kernel/sys.c */ +#define __NR_setpriority 140 +__SYSCALL(__NR_setpriority, sys_setpriority) +#define __NR_getpriority 141 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_reboot 142 +__SYSCALL(__NR_reboot, sys_reboot) +#define __NR_setregid 143 +__SYSCALL(__NR_setregid, sys_setregid) +#define __NR_setgid 144 +__SYSCALL(__NR_setgid, sys_setgid) +#define __NR_setreuid 145 +__SYSCALL(__NR_setreuid, sys_setreuid) +#define __NR_setuid 146 +__SYSCALL(__NR_setuid, sys_setuid) +#define __NR_setresuid 147 +__SYSCALL(__NR_setresuid, sys_setresuid) +#define __NR_getresuid 148 +__SYSCALL(__NR_getresuid, sys_getresuid) +#define __NR_setresgid 149 +__SYSCALL(__NR_setresgid, sys_setresgid) +#define __NR_getresgid 150 +__SYSCALL(__NR_getresgid, sys_getresgid) +#define __NR_setfsuid 151 +__SYSCALL(__NR_setfsuid, sys_setfsuid) +#define __NR_setfsgid 152 +__SYSCALL(__NR_setfsgid, sys_setfsgid) +#define __NR_times 153 +__SYSCALL(__NR_times, sys_times) +#define __NR_setpgid 154 +__SYSCALL(__NR_setpgid, sys_setpgid) +#define __NR_getpgid 155 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_getsid 156 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_setsid 157 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_getgroups 158 +__SYSCALL(__NR_getgroups, sys_getgroups) +#define __NR_setgroups 159 +__SYSCALL(__NR_setgroups, sys_setgroups) +#define __NR_uname 160 +__SYSCALL(__NR_uname, sys_newuname) +#define __NR_sethostname 161 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setdomainname 162 +__SYSCALL(__NR_setdomainname, sys_setdomainname) +#define __NR_getrlimit 163 +__SYSCALL(__NR_getrlimit, sys_getrlimit) +#define __NR_setrlimit 164 +__SYSCALL(__NR_setrlimit, sys_setrlimit) +#define __NR_getrusage 165 +__SYSCALL(__NR_getrusage, sys_getrusage) +#define __NR_umask 166 +__SYSCALL(__NR_umask, sys_umask) +#define __NR_prctl 167 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_getcpu 168 +__SYSCALL(__NR_getcpu, sys_getcpu) + +/* kernel/time.c */ +#define __NR_gettimeofday 169 +__SYSCALL(__NR_gettimeofday, sys_gettimeofday) +#define __NR_settimeofday 170 +__SYSCALL(__NR_settimeofday, sys_settimeofday) +#define __NR_adjtimex 171 +__SYSCALL(__NR_adjtimex, sys_adjtimex) + +/* kernel/timer.c */ +#define __NR_getpid 172 +__SYSCALL(__NR_getpid, sys_getpid) +#define __NR_getppid 173 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getuid 174 +__SYSCALL(__NR_getuid, sys_getuid) +#define __NR_geteuid 175 +__SYSCALL(__NR_geteuid, sys_geteuid) +#define __NR_getgid 176 +__SYSCALL(__NR_getgid, sys_getgid) +#define __NR_getegid 177 +__SYSCALL(__NR_getegid, sys_getegid) +#define __NR_gettid 178 +__SYSCALL(__NR_gettid, sys_gettid) +#define __NR_sysinfo 179 +__SYSCALL(__NR_sysinfo, sys_sysinfo) + +/* ipc/mqueue.c */ +#define __NR_mq_open 180 +__SYSCALL(__NR_mq_open, sys_mq_open) +#define __NR_mq_unlink 181 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 182 +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) +#define __NR_mq_timedreceive 183 +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) +#define __NR_mq_notify 184 +__SYSCALL(__NR_mq_notify, sys_mq_notify) +#define __NR_mq_getsetattr 185 +__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) + +/* ipc/msg.c */ +#define __NR_msgget 186 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgctl 187 +__SYSCALL(__NR_msgctl, sys_msgctl) +#define __NR_msgrcv 188 +__SYSCALL(__NR_msgrcv, sys_msgrcv) +#define __NR_msgsnd 189 +__SYSCALL(__NR_msgsnd, sys_msgsnd) + +/* ipc/sem.c */ +#define __NR_semget 190 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semctl 191 +__SYSCALL(__NR_semctl, sys_semctl) +#define __NR_semtimedop 192 +__SYSCALL(__NR_semtimedop, sys_semtimedop) +#define __NR_semop 193 +__SYSCALL(__NR_semop, sys_semop) + +/* ipc/shm.c */ +#define __NR_shmget 194 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmctl 195 +__SYSCALL(__NR_shmctl, sys_shmctl) +#define __NR_shmat 196 +__SYSCALL(__NR_shmat, sys_shmat) +#define __NR_shmdt 197 +__SYSCALL(__NR_shmdt, sys_shmdt) + +/* net/socket.c */ +#define __NR_socket 198 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_socketpair 199 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_bind 200 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_listen 201 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_accept 202 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_connect 203 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_getsockname 204 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 205 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_sendto 206 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recvfrom 207 +__SYSCALL(__NR_recvfrom, sys_recvfrom) +#define __NR_setsockopt 208 +__SYSCALL(__NR_setsockopt, sys_setsockopt) +#define __NR_getsockopt 209 +__SYSCALL(__NR_getsockopt, sys_getsockopt) +#define __NR_shutdown 210 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_sendmsg 211 +__SYSCALL(__NR_sendmsg, sys_sendmsg) +#define __NR_recvmsg 212 +__SYSCALL(__NR_recvmsg, sys_recvmsg) + +/* mm/filemap.c */ +#define __NR_readahead 213 +__SYSCALL(__NR_readahead, sys_readahead) + +/* mm/nommu.c, also with MMU */ +#define __NR_brk 214 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_munmap 215 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_mremap 216 +__SYSCALL(__NR_mremap, sys_mremap) + +/* security/keys/keyctl.c */ +#define __NR_add_key 217 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 218 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 219 +__SYSCALL(__NR_keyctl, sys_keyctl) + +/* arch/example/kernel/sys_example.c */ +#define __NR_clone 220 +__SYSCALL(__NR_clone, sys_clone) /* .long sys_clone_wrapper */ +#define __NR_execve 221 +__SYSCALL(__NR_execve, sys_execve) /* .long sys_execve_wrapper */ + +#define __NR3264_mmap 222 +__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap) +/* mm/fadvise.c */ +#define __NR3264_fadvise64 223 +__SC_3264(__NR3264_fadvise64, sys_fadvise64_64, sys_fadvise64) + +/* mm/, CONFIG_MMU only */ +#ifndef __ARCH_NOMMU +#define __NR_swapon 224 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_swapoff 225 +__SYSCALL(__NR_swapoff, sys_swapoff) +#define __NR_mprotect 226 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_msync 227 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_mlock 228 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 229 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 230 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 231 +__SYSCALL(__NR_munlockall, sys_munlockall) +#define __NR_mincore 232 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 233 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_remap_file_pages 234 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) +#define __NR_mbind 235 +__SYSCALL(__NR_mbind, sys_mbind) +#define __NR_get_mempolicy 236 +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) +#define __NR_set_mempolicy 237 +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) +#define __NR_migrate_pages 238 +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) +#define __NR_move_pages 239 +__SYSCALL(__NR_move_pages, sys_move_pages) +#endif + +#undef __NR_syscalls +#define __NR_syscalls 240 + +/* + * All syscalls below here should go away really, + * these are provided for both review and as a porting + * help for the C library version. +* + * Last chance: are any of these important enought to + * enable by default? + */ +#ifdef __ARCH_WANT_SYSCALL_NO_AT +#define __NR_open 1024 +__SYSCALL(__NR_open, sys_open) +#define __NR_link 1025 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 1026 +__SYSCALL(__NR_unlink, sys_unlink) +#define __NR_mknod 1027 +__SYSCALL(__NR_mknod, sys_mknod) +#define __NR_chmod 1028 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_chown 1029 +__SYSCALL(__NR_chown, sys_chown) +#define __NR_mkdir 1030 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 1031 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_lchown 1032 +__SYSCALL(__NR_lchown, sys_lchown) +#define __NR_access 1033 +__SYSCALL(__NR_access, sys_access) +#define __NR_rename 1034 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_readlink 1035 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_symlink 1036 +__SYSCALL(__NR_symlink, sys_symlink) +#define __NR_utimes 1037 +__SYSCALL(__NR_utimes, sys_utimes) +#define __NR3264_stat 1038 +__SC_3264(__NR3264_stat, sys_stat64, sys_newstat) +#define __NR3264_lstat 1039 +__SC_3264(__NR3264_lstat, sys_lstat64, sys_newlstat) + +#undef __NR_syscalls +#define __NR_syscalls (__NR3264_lstat+1) +#endif /* __ARCH_WANT_SYSCALL_NO_AT */ + +#ifdef __ARCH_WANT_SYSCALL_NO_FLAGS +#define __NR_pipe 1040 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_dup2 1041 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_epoll_create 1042 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_inotify_init 1043 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_eventfd 1044 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_signalfd 1045 +__SYSCALL(__NR_signalfd, sys_signalfd) + +#undef __NR_syscalls +#define __NR_syscalls (__NR_signalfd+1) +#endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */ + +#if __BITS_PER_LONG == 32 && defined(__ARCH_WANT_SYSCALL_OFF_T) +#define __NR_sendfile 1046 +__SYSCALL(__NR_sendfile, sys_sendfile) +#define __NR_ftruncate 1047 +__SYSCALL(__NR_ftruncate, sys_ftruncate) +#define __NR_truncate 1048 +__SYSCALL(__NR_truncate, sys_truncate) +#define __NR_stat 1049 +__SYSCALL(__NR_stat, sys_newstat) +#define __NR_lstat 1050 +__SYSCALL(__NR_lstat, sys_newlstat) +#define __NR_fstat 1051 +__SYSCALL(__NR_fstat, sys_newfstat) +#define __NR_fcntl 1052 +__SYSCALL(__NR_fcntl, sys_fcntl) +#define __NR_fadvise64 1053 +#define __ARCH_WANT_SYS_FADVISE64 +__SYSCALL(__NR_fadvise64, sys_fadvise64) +#define __NR_newfstatat 1054 +#define __ARCH_WANT_SYS_NEWFSTATAT +__SYSCALL(__NR_newfstatat, sys_newfstatat) +#define __NR_fstatfs 1055 +__SYSCALL(__NR_fstatfs, sys_fstatfs) +#define __NR_statfs 1056 +__SYSCALL(__NR_statfs, sys_statfs) +#define __NR_lseek 1057 +__SYSCALL(__NR_lseek, sys_lseek) +#define __NR_mmap 1058 +__SYSCALL(__NR_mmap, sys_mmap) + +#undef __NR_syscalls +#define __NR_syscalls (__NR_mmap+1) +#endif /* 32 bit off_t syscalls */ + +#ifdef __ARCH_WANT_SYSCALL_DEPRECATED +#define __NR_alarm 1059 +#define __ARCH_WANT_SYS_ALARM +__SYSCALL(__NR_alarm, sys_alarm) +#define __NR_getpgrp 1060 +#define __ARCH_WANT_SYS_GETPGRP +__SYSCALL(__NR_getpgrp, sys_getpgrp) +#define __NR_pause 1061 +#define __ARCH_WANT_SYS_PAUSE +__SYSCALL(__NR_pause, sys_pause) +#define __NR_time 1062 +#define __ARCH_WANT_SYS_TIME +__SYSCALL(__NR_time, sys_time) +#define __NR_utime 1063 +#define __ARCH_WANT_SYS_UTIME +__SYSCALL(__NR_utime, sys_utime) + +#define __NR_creat 1064 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_getdents 1065 +#define __ARCH_WANT_SYS_GETDENTS +__SYSCALL(__NR_getdents, sys_getdents) +#define __NR_futimesat 1066 +__SYSCALL(__NR_futimesat, sys_futimesat) +#define __NR_select 1067 +#define __ARCH_WANT_SYS_SELECT +__SYSCALL(__NR_select, sys_select) +#define __NR_poll 1068 +__SYSCALL(__NR_poll, sys_poll) +#define __NR_epoll_wait 1069 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_ustat 1070 +__SYSCALL(__NR_ustat, sys_ustat) +#define __NR_vfork 1071 +__SYSCALL(__NR_vfork, sys_vfork) +#define __NR_wait4 1072 +__SYSCALL(__NR_wait4, sys_wait4) +#define __NR_recv 1073 +__SYSCALL(__NR_recv, sys_recv) +#define __NR_send 1074 +__SYSCALL(__NR_send, sys_send) +#define __NR_bdflush 1075 +__SYSCALL(__NR_bdflush, sys_bdflush) +#define __NR_umount 1076 +__SYSCALL(__NR_umount, sys_oldumount) +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __NR_uselib 1077 +__SYSCALL(__NR_uselib, sys_uselib) +#define __NR__sysctl 1078 +__SYSCALL(__NR__sysctl, sys_sysctl) + +#define __NR_fork 1079 +#ifdef CONFIG_MMU +__SYSCALL(__NR_fork, sys_fork) +#else +__SYSCALL(__NR_fork, sys_ni_syscall) +#endif /* CONFIG_MMU */ + +#undef __NR_syscalls +#define __NR_syscalls (__NR_fork+1) + +#endif /* __ARCH_WANT_SYSCALL_DEPRECATED */ + +/* + * 32 bit systems traditionally used different + * syscalls for off_t and loff_t arguments, while + * 64 bit systems only need the off_t version. + * For new 32 bit platforms, there is no need to + * implement the old 32 bit off_t syscalls, so + * they take different names. + * Here we map the numbers so that both versions + * use the same syscall table layout. + */ +#if __BITS_PER_LONG == 64 +#define __NR_fcntl __NR3264_fcntl +#define __NR_statfs __NR3264_statfs +#define __NR_fstatfs __NR3264_fstatfs +#define __NR_truncate __NR3264_truncate +#define __NR_ftruncate __NR3264_truncate +#define __NR_lseek __NR3264_lseek +#define __NR_sendfile __NR3264_sendfile +#define __NR_newfstatat __NR3264_fstatat +#define __NR_fstat __NR3264_fstat +#define __NR_mmap __NR3264_mmap +#define __NR_fadvise64 __NR3264_fadvise64 +#ifdef __NR3264_stat +#define __NR_stat __NR3264_stat +#define __NR_lstat __NR3264_lstat +#endif +#else +#define __NR_fcntl64 __NR3264_fcntl +#define __NR_statfs64 __NR3264_statfs +#define __NR_fstatfs64 __NR3264_fstatfs +#define __NR_truncate64 __NR3264_truncate +#define __NR_ftruncate64 __NR3264_truncate +#define __NR_llseek __NR3264_lseek +#define __NR_sendfile64 __NR3264_sendfile +#define __NR_fstatat64 __NR3264_fstatat +#define __NR_fstat64 __NR3264_fstat +#define __NR_mmap2 __NR3264_mmap +#define __NR_fadvise64_64 __NR3264_fadvise64 +#ifdef __NR3264_stat +#define __NR_stat64 __NR3264_stat +#define __NR_lstat64 __NR3264_lstat +#endif +#endif + +#ifdef __KERNEL__ + +/* + * These are required system calls, we should + * invert the logic eventually and let them + * be selected by default. + */ +#if __BITS_PER_LONG == 32 +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_LLSEEK +#endif +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#ifndef cond_syscall +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") +#endif + +#endif /* __KERNEL__ */ +#endif /* _ASM_GENERIC_UNISTD_H */ -- cgit v1.2.3 From 72099ed2719fc5829bd79c6ca9d1783ed026eb37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:29 +0000 Subject: asm-generic: rename atomic.h to atomic-long.h The existing asm-generic/atomic.h only defines the atomic_long type. This renames it to atomic-long.h so we have a place to add a truly generic atomic.h that can be used on all non-SMP systems. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann Acked-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 258 ++++++++++++++++++++++++++++++++++++ include/asm-generic/atomic.h | 258 ------------------------------------ include/asm-generic/bitops/atomic.h | 1 + 3 files changed, 259 insertions(+), 258 deletions(-) create mode 100644 include/asm-generic/atomic-long.h delete mode 100644 include/asm-generic/atomic.h (limited to 'include') diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h new file mode 100644 index 000000000000..76e27d66c055 --- /dev/null +++ b/include/asm-generic/atomic-long.h @@ -0,0 +1,258 @@ +#ifndef _ASM_GENERIC_ATOMIC_LONG_H +#define _ASM_GENERIC_ATOMIC_LONG_H +/* + * Copyright (C) 2005 Silicon Graphics, Inc. + * Christoph Lameter + * + * Allows to provide arch independent atomic definitions without the need to + * edit all arch specific atomic.h files. + */ + +#include + +/* + * Suppport for atomic_long_t + * + * Casts for parameters are avoided for existing atomic functions in order to + * avoid issues with cast-as-lval under gcc 4.x and other limitations that the + * macros of a platform may have. + */ + +#if BITS_PER_LONG == 64 + +typedef atomic64_t atomic_long_t; + +#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) + +static inline long atomic_long_read(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_read(v); +} + +static inline void atomic_long_set(atomic_long_t *l, long i) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_set(v, i); +} + +static inline void atomic_long_inc(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_inc(v); +} + +static inline void atomic_long_dec(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_dec(v); +} + +static inline void atomic_long_add(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_add(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_sub(i, v); +} + +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_unless(v, a, u); +} + +#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l)) + +#define atomic_long_cmpxchg(l, old, new) \ + (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) +#define atomic_long_xchg(v, new) \ + (atomic64_xchg((atomic64_t *)(l), (new))) + +#else /* BITS_PER_LONG == 64 */ + +typedef atomic_t atomic_long_t; + +#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) +static inline long atomic_long_read(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_read(v); +} + +static inline void atomic_long_set(atomic_long_t *l, long i) +{ + atomic_t *v = (atomic_t *)l; + + atomic_set(v, i); +} + +static inline void atomic_long_inc(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_inc(v); +} + +static inline void atomic_long_dec(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_dec(v); +} + +static inline void atomic_long_add(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_add(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_sub(i, v); +} + +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_unless(v, a, u); +} + +#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l)) + +#define atomic_long_cmpxchg(l, old, new) \ + (atomic_cmpxchg((atomic_t *)(l), (old), (new))) +#define atomic_long_xchg(v, new) \ + (atomic_xchg((atomic_t *)(v), (new))) + +#endif /* BITS_PER_LONG == 64 */ + +#endif /* _ASM_GENERIC_ATOMIC_LONG_H */ diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h deleted file mode 100644 index 3673a13b6703..000000000000 --- a/include/asm-generic/atomic.h +++ /dev/null @@ -1,258 +0,0 @@ -#ifndef _ASM_GENERIC_ATOMIC_H -#define _ASM_GENERIC_ATOMIC_H -/* - * Copyright (C) 2005 Silicon Graphics, Inc. - * Christoph Lameter - * - * Allows to provide arch independent atomic definitions without the need to - * edit all arch specific atomic.h files. - */ - -#include - -/* - * Suppport for atomic_long_t - * - * Casts for parameters are avoided for existing atomic functions in order to - * avoid issues with cast-as-lval under gcc 4.x and other limitations that the - * macros of a platform may have. - */ - -#if BITS_PER_LONG == 64 - -typedef atomic64_t atomic_long_t; - -#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) - -static inline long atomic_long_read(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_read(v); -} - -static inline void atomic_long_set(atomic_long_t *l, long i) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_set(v, i); -} - -static inline void atomic_long_inc(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_inc(v); -} - -static inline void atomic_long_dec(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_dec(v); -} - -static inline void atomic_long_add(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_add(i, v); -} - -static inline void atomic_long_sub(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_sub(i, v); -} - -static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_sub_and_test(i, v); -} - -static inline int atomic_long_dec_and_test(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_dec_and_test(v); -} - -static inline int atomic_long_inc_and_test(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_inc_and_test(v); -} - -static inline int atomic_long_add_negative(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_add_negative(i, v); -} - -static inline long atomic_long_add_return(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_add_return(i, v); -} - -static inline long atomic_long_sub_return(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_sub_return(i, v); -} - -static inline long atomic_long_inc_return(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_inc_return(v); -} - -static inline long atomic_long_dec_return(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_dec_return(v); -} - -static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_add_unless(v, a, u); -} - -#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l)) - -#define atomic_long_cmpxchg(l, old, new) \ - (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) -#define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(l), (new))) - -#else /* BITS_PER_LONG == 64 */ - -typedef atomic_t atomic_long_t; - -#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) -static inline long atomic_long_read(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_read(v); -} - -static inline void atomic_long_set(atomic_long_t *l, long i) -{ - atomic_t *v = (atomic_t *)l; - - atomic_set(v, i); -} - -static inline void atomic_long_inc(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_inc(v); -} - -static inline void atomic_long_dec(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_dec(v); -} - -static inline void atomic_long_add(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_add(i, v); -} - -static inline void atomic_long_sub(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_sub(i, v); -} - -static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_sub_and_test(i, v); -} - -static inline int atomic_long_dec_and_test(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_dec_and_test(v); -} - -static inline int atomic_long_inc_and_test(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_inc_and_test(v); -} - -static inline int atomic_long_add_negative(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_add_negative(i, v); -} - -static inline long atomic_long_add_return(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_add_return(i, v); -} - -static inline long atomic_long_sub_return(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_sub_return(i, v); -} - -static inline long atomic_long_inc_return(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_inc_return(v); -} - -static inline long atomic_long_dec_return(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_dec_return(v); -} - -static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_add_unless(v, a, u); -} - -#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l)) - -#define atomic_long_cmpxchg(l, old, new) \ - (atomic_cmpxchg((atomic_t *)(l), (old), (new))) -#define atomic_long_xchg(v, new) \ - (atomic_xchg((atomic_t *)(v), (new))) - -#endif /* BITS_PER_LONG == 64 */ - -#endif /* _ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 4657f3e410fc..c8946465e63a 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BITOPS_ATOMIC_H_ #include +#include #ifdef CONFIG_SMP #include -- cgit v1.2.3 From 5b17e1cd8928ae65932758ce6478ac6d3e9a86b2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:30 +0000 Subject: asm-generic: rename page.h and uaccess.h The current asm-generic/page.h only contains the get_order function, and asm-generic/uaccess.h only implements unaligned accesses. This renames the file to getorder.h and uaccess-unaligned.h to make room for new page.h and uaccess.h file that will be usable by all simple (e.g. nommu) architectures. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/getorder.h | 24 ++++++++++++++++++++++++ include/asm-generic/page.h | 24 ------------------------ include/asm-generic/uaccess-unaligned.h | 26 ++++++++++++++++++++++++++ include/asm-generic/uaccess.h | 26 -------------------------- 4 files changed, 50 insertions(+), 50 deletions(-) create mode 100644 include/asm-generic/getorder.h delete mode 100644 include/asm-generic/page.h create mode 100644 include/asm-generic/uaccess-unaligned.h delete mode 100644 include/asm-generic/uaccess.h (limited to 'include') diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h new file mode 100644 index 000000000000..67e7245dc9b3 --- /dev/null +++ b/include/asm-generic/getorder.h @@ -0,0 +1,24 @@ +#ifndef __ASM_GENERIC_GETORDER_H +#define __ASM_GENERIC_GETORDER_H + +#ifndef __ASSEMBLY__ + +#include + +/* Pure 2^n version of get_order */ +static inline __attribute_const__ int get_order(unsigned long size) +{ + int order; + + size = (size - 1) >> (PAGE_SHIFT - 1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GENERIC_GETORDER_H */ diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h deleted file mode 100644 index 14db733b8e68..000000000000 --- a/include/asm-generic/page.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _ASM_GENERIC_PAGE_H -#define _ASM_GENERIC_PAGE_H - -#ifndef __ASSEMBLY__ - -#include - -/* Pure 2^n version of get_order */ -static __inline__ __attribute_const__ int get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> (PAGE_SHIFT - 1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_GENERIC_PAGE_H */ diff --git a/include/asm-generic/uaccess-unaligned.h b/include/asm-generic/uaccess-unaligned.h new file mode 100644 index 000000000000..67deb898f0c5 --- /dev/null +++ b/include/asm-generic/uaccess-unaligned.h @@ -0,0 +1,26 @@ +#ifndef __ASM_GENERIC_UACCESS_UNALIGNED_H +#define __ASM_GENERIC_UACCESS_UNALIGNED_H + +/* + * This macro should be used instead of __get_user() when accessing + * values at locations that are not known to be aligned. + */ +#define __get_user_unaligned(x, ptr) \ +({ \ + __typeof__ (*(ptr)) __x; \ + __copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0; \ + (x) = __x; \ +}) + + +/* + * This macro should be used instead of __put_user() when accessing + * values at locations that are not known to be aligned. + */ +#define __put_user_unaligned(x, ptr) \ +({ \ + __typeof__ (*(ptr)) __x = (x); \ + __copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0; \ +}) + +#endif /* __ASM_GENERIC_UACCESS_UNALIGNED_H */ diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h deleted file mode 100644 index 549cb3a1640a..000000000000 --- a/include/asm-generic/uaccess.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _ASM_GENERIC_UACCESS_H_ -#define _ASM_GENERIC_UACCESS_H_ - -/* - * This macro should be used instead of __get_user() when accessing - * values at locations that are not known to be aligned. - */ -#define __get_user_unaligned(x, ptr) \ -({ \ - __typeof__ (*(ptr)) __x; \ - __copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0; \ - (x) = __x; \ -}) - - -/* - * This macro should be used instead of __put_user() when accessing - * values at locations that are not known to be aligned. - */ -#define __put_user_unaligned(x, ptr) \ -({ \ - __typeof__ (*(ptr)) __x = (x); \ - __copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0; \ -}) - -#endif /* _ASM_GENERIC_UACCESS_H */ -- cgit v1.2.3 From 3aef392822e1a42e80077a332ea2efdfd8a4248a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 17 May 2009 21:00:05 +0000 Subject: asm-generic: make get_rtc_time overridable Evidently, set_rtc_time is supposed to be overridable by architectures that define their own version, but unfortunately, get_rtc_ss would in that case still use the generic version. This makes get_rtc_ss call the real set_rtc_time to let architectures define their own version. The change should fix the "Extended RTC operation" on Alpha, which uses the incorrect get_rtc_ss call. It also allows PowerPC to use the asm-generic/rtc.h file in the future. Cc: Richard Henderson Cc: linux-alpha@vger.kernel.org Cc: Tom Rini Cc: rtc-linux@googlegroups.com Cc: Alessandro Zummo Cc: Paul Gortmaker Signed-off-by: Arnd Bergmann --- include/asm-generic/rtc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h index 763e3b060f43..fa86f240c874 100644 --- a/include/asm-generic/rtc.h +++ b/include/asm-generic/rtc.h @@ -202,7 +202,7 @@ static inline unsigned int get_rtc_ss(void) { struct rtc_time h; - __get_rtc_time(&h); + get_rtc_time(&h); return h.tm_sec; } -- cgit v1.2.3 From d7c4f1b78afeedfc22b1756fcdc1acbe84284d74 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:31 +0000 Subject: asm-generic: make pci.h usable directly Some generic code is using the horribly misnamed PCI_DMA_BUS_IS_PHYS from asm/pci.h. This makes sure that an architecture without PCI support does not have to define this itself but can rely on the asm-generic version. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/pci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h index c36a77d3bf44..515c6e2e3218 100644 --- a/include/asm-generic/pci.h +++ b/include/asm-generic/pci.h @@ -52,4 +52,12 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */ +/* + * By default, assume that no iommu is in use and that the PCI + * space is mapped to address physical 0. + */ +#ifndef PCI_DMA_BUS_IS_PHYS +#define PCI_DMA_BUS_IS_PHYS (1) #endif + +#endif /* _ASM_GENERIC_PCI_H */ -- cgit v1.2.3 From 9858c60cc2d33b18367b2bc6947e3ea23db26ccb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:32 +0000 Subject: asm-generic: make bitops.h usable bitops.h apparently suffered from some level of bitrot, it was missing the smp_mb__{before,after}_clear_bit functions, and included other headers in an invalid order. This changes the file so that new architectures can use it out of the box. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/bitops.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index c9f369c4bd7e..a54f4421a24d 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -1,19 +1,29 @@ -#ifndef _ASM_GENERIC_BITOPS_H_ -#define _ASM_GENERIC_BITOPS_H_ +#ifndef __ASM_GENERIC_BITOPS_H +#define __ASM_GENERIC_BITOPS_H /* * For the benefit of those who are trying to port Linux to another * architecture, here are some C-language equivalents. You should * recode these in the native assembly language, if at all possible. - * + * * C language equivalents written by Theodore Ts'o, 9/26/92 */ -#include -#include +#include +#include + +/* + * clear_bit may not imply a memory barrier + */ +#ifndef smp_mb__before_clear_bit +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() +#endif + #include #include #include +#include #include #include @@ -26,8 +36,10 @@ #include #include +#include +#include #include #include #include -#endif /* _ASM_GENERIC_BITOPS_H */ +#endif /* __ASM_GENERIC_BITOPS_H */ -- cgit v1.2.3 From aafe4dbed0bf6cbdb2e9f03e1d42f8a540d8541d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:33 +0000 Subject: asm-generic: add generic versions of common headers These are all kernel internal interfaces that get copied around a lot. In most cases, architectures can provide their own optimized versions, but these generic versions can work as well. I have tried to use the most common contents of each header to allow existing architectures to migrate easily. Thanks to Remis for suggesting a number of cleanups. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/bugs.h | 10 +++ include/asm-generic/current.h | 9 +++ include/asm-generic/delay.h | 9 +++ include/asm-generic/fb.h | 12 +++ include/asm-generic/hardirq.h | 34 ++++++++ include/asm-generic/irq.h | 18 +++++ include/asm-generic/irqflags.h | 72 +++++++++++++++++ include/asm-generic/kmap_types.h | 32 ++++++++ include/asm-generic/linkage.h | 8 ++ include/asm-generic/module.h | 22 ++++++ include/asm-generic/mutex.h | 9 +++ include/asm-generic/scatterlist.h | 43 ++++++++++ include/asm-generic/spinlock.h | 11 +++ include/asm-generic/string.h | 10 +++ include/asm-generic/syscalls.h | 60 ++++++++++++++ include/asm-generic/system.h | 161 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/unaligned.h | 30 +++++++ include/asm-generic/user.h | 8 ++ 18 files changed, 558 insertions(+) create mode 100644 include/asm-generic/bugs.h create mode 100644 include/asm-generic/current.h create mode 100644 include/asm-generic/delay.h create mode 100644 include/asm-generic/fb.h create mode 100644 include/asm-generic/hardirq.h create mode 100644 include/asm-generic/irq.h create mode 100644 include/asm-generic/irqflags.h create mode 100644 include/asm-generic/kmap_types.h create mode 100644 include/asm-generic/linkage.h create mode 100644 include/asm-generic/module.h create mode 100644 include/asm-generic/mutex.h create mode 100644 include/asm-generic/scatterlist.h create mode 100644 include/asm-generic/spinlock.h create mode 100644 include/asm-generic/string.h create mode 100644 include/asm-generic/syscalls.h create mode 100644 include/asm-generic/system.h create mode 100644 include/asm-generic/unaligned.h create mode 100644 include/asm-generic/user.h (limited to 'include') diff --git a/include/asm-generic/bugs.h b/include/asm-generic/bugs.h new file mode 100644 index 000000000000..6c4f62ea714d --- /dev/null +++ b/include/asm-generic/bugs.h @@ -0,0 +1,10 @@ +#ifndef __ASM_GENERIC_BUGS_H +#define __ASM_GENERIC_BUGS_H +/* + * This file is included by 'init/main.c' to check for + * architecture-dependent bugs. + */ + +static inline void check_bugs(void) { } + +#endif /* __ASM_GENERIC_BUGS_H */ diff --git a/include/asm-generic/current.h b/include/asm-generic/current.h new file mode 100644 index 000000000000..5e86f6ae7cab --- /dev/null +++ b/include/asm-generic/current.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_CURRENT_H +#define __ASM_GENERIC_CURRENT_H + +#include + +#define get_current() (current_thread_info()->task) +#define current get_current() + +#endif /* __ASM_GENERIC_CURRENT_H */ diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h new file mode 100644 index 000000000000..4586fec75ddb --- /dev/null +++ b/include/asm-generic/delay.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_DELAY_H +#define __ASM_GENERIC_DELAY_H + +extern void __udelay(unsigned long usecs); +extern void __delay(unsigned long loops); + +#define udelay(n) __udelay(n) + +#endif /* __ASM_GENERIC_DELAY_H */ diff --git a/include/asm-generic/fb.h b/include/asm-generic/fb.h new file mode 100644 index 000000000000..fe8ca7fcea00 --- /dev/null +++ b/include/asm-generic/fb.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_FB_H_ +#define __ASM_GENERIC_FB_H_ +#include + +#define fb_pgprotect(...) do {} while (0) + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* __ASM_GENERIC_FB_H_ */ diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h new file mode 100644 index 000000000000..3d5d2c906ab3 --- /dev/null +++ b/include/asm-generic/hardirq.h @@ -0,0 +1,34 @@ +#ifndef __ASM_GENERIC_HARDIRQ_H +#define __ASM_GENERIC_HARDIRQ_H + +#include +#include +#include + +typedef struct { + unsigned long __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +#ifndef HARDIRQ_BITS +#define HARDIRQ_BITS 8 +#endif + +/* + * The hardirq mask has to be large enough to have + * space for potentially all IRQ sources in the system + * nesting on a single CPU: + */ +#if (1 << HARDIRQ_BITS) < NR_IRQS +# error HARDIRQ_BITS is too low! +#endif + +#ifndef ack_bad_irq +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} +#endif + +#endif /* __ASM_GENERIC_HARDIRQ_H */ diff --git a/include/asm-generic/irq.h b/include/asm-generic/irq.h new file mode 100644 index 000000000000..b90ec0bc485f --- /dev/null +++ b/include/asm-generic/irq.h @@ -0,0 +1,18 @@ +#ifndef __ASM_GENERIC_IRQ_H +#define __ASM_GENERIC_IRQ_H + +/* + * NR_IRQS is the upper bound of how many interrupts can be handled + * in the platform. It is used to size the static irq_map array, + * so don't make it too big. + */ +#ifndef NR_IRQS +#define NR_IRQS 64 +#endif + +static inline int irq_canonicalize(int irq) +{ + return irq; +} + +#endif /* __ASM_GENERIC_IRQ_H */ diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h new file mode 100644 index 000000000000..9aebf618275a --- /dev/null +++ b/include/asm-generic/irqflags.h @@ -0,0 +1,72 @@ +#ifndef __ASM_GENERIC_IRQFLAGS_H +#define __ASM_GENERIC_IRQFLAGS_H + +/* + * All architectures should implement at least the first two functions, + * usually inline assembly will be the best way. + */ +#ifndef RAW_IRQ_DISABLED +#define RAW_IRQ_DISABLED 0 +#define RAW_IRQ_ENABLED 1 +#endif + +/* read interrupt enabled status */ +#ifndef __raw_local_save_flags +unsigned long __raw_local_save_flags(void); +#endif + +/* set interrupt enabled status */ +#ifndef raw_local_irq_restore +void raw_local_irq_restore(unsigned long flags); +#endif + +/* get status and disable interrupts */ +#ifndef __raw_local_irq_save +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags; + flags = __raw_local_save_flags(); + raw_local_irq_restore(RAW_IRQ_DISABLED); + return flags; +} +#endif + +/* test flags */ +#ifndef raw_irqs_disabled_flags +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return flags == RAW_IRQ_DISABLED; +} +#endif + +/* unconditionally enable interrupts */ +#ifndef raw_local_irq_enable +static inline void raw_local_irq_enable(void) +{ + raw_local_irq_restore(RAW_IRQ_ENABLED); +} +#endif + +/* unconditionally disable interrupts */ +#ifndef raw_local_irq_disable +static inline void raw_local_irq_disable(void) +{ + raw_local_irq_restore(RAW_IRQ_DISABLED); +} +#endif + +/* test hardware interrupt enable bit */ +#ifndef raw_irqs_disabled +static inline int raw_irqs_disabled(void) +{ + return raw_irqs_disabled_flags(__raw_local_save_flags()); +} +#endif + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +#endif /* __ASM_GENERIC_IRQFLAGS_H */ diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h new file mode 100644 index 000000000000..58c33055c304 --- /dev/null +++ b/include/asm-generic/kmap_types.h @@ -0,0 +1,32 @@ +#ifndef _ASM_GENERIC_KMAP_TYPES_H +#define _ASM_GENERIC_KMAP_TYPES_H + +#ifdef CONFIG_DEBUG_HIGHMEM +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_SYNC_ICACHE, +D(14) KM_SYNC_DCACHE, +D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ +D(16) KM_TYPE_NR +}; + +#undef D + +#endif diff --git a/include/asm-generic/linkage.h b/include/asm-generic/linkage.h new file mode 100644 index 000000000000..fef7a01e5415 --- /dev/null +++ b/include/asm-generic/linkage.h @@ -0,0 +1,8 @@ +#ifndef __ASM_GENERIC_LINKAGE_H +#define __ASM_GENERIC_LINKAGE_H +/* + * linux/linkage.h provides reasonable defaults. + * an architecture can override them by providing its own version. + */ + +#endif /* __ASM_GENERIC_LINKAGE_H */ diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h new file mode 100644 index 000000000000..ed5b44de4c91 --- /dev/null +++ b/include/asm-generic/module.h @@ -0,0 +1,22 @@ +#ifndef __ASM_GENERIC_MODULE_H +#define __ASM_GENERIC_MODULE_H + +/* + * Many architectures just need a simple module + * loader without arch specific data. + */ +struct mod_arch_specific +{ +}; + +#ifdef CONFIG_64BIT +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr +#else +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr +#endif + +#endif /* __ASM_GENERIC_MODULE_H */ diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h new file mode 100644 index 000000000000..fe91ab502793 --- /dev/null +++ b/include/asm-generic/mutex.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_MUTEX_H +#define __ASM_GENERIC_MUTEX_H +/* + * Pull in the generic implementation for the mutex fastpath, + * which is a reasonable default on many architectures. + */ + +#include +#endif /* __ASM_GENERIC_MUTEX_H */ diff --git a/include/asm-generic/scatterlist.h b/include/asm-generic/scatterlist.h new file mode 100644 index 000000000000..8b9454496a7c --- /dev/null +++ b/include/asm-generic/scatterlist.h @@ -0,0 +1,43 @@ +#ifndef __ASM_GENERIC_SCATTERLIST_H +#define __ASM_GENERIC_SCATTERLIST_H + +#include + +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; + unsigned int dma_length; +}; + +/* + * These macros should be used after a dma_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#ifndef sg_dma_len +/* + * Normally, you have an iommu on 64 bit machines, but not on 32 bit + * machines. Architectures that are differnt should override this. + */ +#if __BITS_PER_LONG == 64 +#define sg_dma_len(sg) ((sg)->dma_length) +#else +#define sg_dma_len(sg) ((sg)->length) +#endif /* 64 bit */ +#endif /* sg_dma_len */ + +#ifndef ISA_DMA_THRESHOLD +#define ISA_DMA_THRESHOLD (~0UL) +#endif + +#define ARCH_HAS_SG_CHAIN + +#endif /* __ASM_GENERIC_SCATTERLIST_H */ diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h new file mode 100644 index 000000000000..1547a03ac50f --- /dev/null +++ b/include/asm-generic/spinlock.h @@ -0,0 +1,11 @@ +#ifndef __ASM_GENERIC_SPINLOCK_H +#define __ASM_GENERIC_SPINLOCK_H +/* + * You need to implement asm/spinlock.h for SMP support. The generic + * version does not handle SMP. + */ +#ifdef CONFIG_SMP +#error need an architecture specific asm/spinlock.h +#endif + +#endif /* __ASM_GENERIC_SPINLOCK_H */ diff --git a/include/asm-generic/string.h b/include/asm-generic/string.h new file mode 100644 index 000000000000..de5e0201459f --- /dev/null +++ b/include/asm-generic/string.h @@ -0,0 +1,10 @@ +#ifndef __ASM_GENERIC_STRING_H +#define __ASM_GENERIC_STRING_H +/* + * The kernel provides all required functions in lib/string.c + * + * Architectures probably want to provide at least their own optimized + * memcpy and memset functions though. + */ + +#endif /* __ASM_GENERIC_STRING_H */ diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h new file mode 100644 index 000000000000..df84e3b04555 --- /dev/null +++ b/include/asm-generic/syscalls.h @@ -0,0 +1,60 @@ +#ifndef __ASM_GENERIC_SYSCALLS_H +#define __ASM_GENERIC_SYSCALLS_H + +#include +#include + +/* + * Calling conventions for these system calls can differ, so + * it's possible to override them. + */ +#ifndef sys_clone +asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid, + struct pt_regs *regs); +#endif + +#ifndef sys_fork +asmlinkage long sys_fork(struct pt_regs *regs); +#endif + +#ifndef sys_vfork +asmlinkage long sys_vfork(struct pt_regs *regs); +#endif + +#ifndef sys_execve +asmlinkage long sys_execve(char __user *filename, char __user * __user *argv, + char __user * __user *envp, struct pt_regs *regs); +#endif + +#ifndef sys_mmap2 +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +#endif + +#ifndef sys_mmap +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t pgoff); +#endif + +#ifndef sys_sigaltstack +asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); +#endif + +#ifndef sys_rt_sigreturn +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs); +#endif + +#ifndef sys_rt_sigsuspend +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); +#endif + +#ifndef sys_rt_sigaction +asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, + struct sigaction __user *oact, size_t sigsetsize); +#endif + +#endif /* __ASM_GENERIC_SYSCALLS_H */ diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h new file mode 100644 index 000000000000..efa403b5e121 --- /dev/null +++ b/include/asm-generic/system.h @@ -0,0 +1,161 @@ +/* Generic system definitions, based on MN10300 definitions. + * + * It should be possible to use these on really simple architectures, + * but it serves more as a starting point for new ports. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __ASM_GENERIC_SYSTEM_H +#define __ASM_GENERIC_SYSTEM_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include +#include + +#include + +struct task_struct; + +/* context switching is now performed out-of-line in switch_to.S */ +extern struct task_struct *__switch_to(struct task_struct *, + struct task_struct *); +#define switch_to(prev, next, last) \ + do { \ + ((last) = __switch_to((prev), (next))); \ + } while (0) + +#define arch_align_stack(x) (x) + +#define nop() asm volatile ("nop") + +#endif /* !__ASSEMBLY__ */ + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * This implementation only contains a compiler barrier. + */ + +#define mb() asm volatile ("": : :"memory") +#define rmb() mb() +#define wmb() asm volatile ("": : :"memory") + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + +#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +#define read_barrier_depends() do {} while (0) +#define smp_read_barrier_depends() do {} while (0) + +/* + * we make sure local_irq_enable() doesn't cause priority inversion + */ +#ifndef __ASSEMBLY__ + +/* This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid xchg(). */ +extern void __xchg_called_with_bad_pointer(void); + +static inline +unsigned long __xchg(unsigned long x, volatile void *ptr, int size) +{ + unsigned long ret, flags; + + switch (size) { + case 1: +#ifdef __xchg_u8 + return __xchg_u8(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u8 *)ptr; + *(volatile u8 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u8 */ + + case 2: +#ifdef __xchg_u16 + return __xchg_u16(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u16 *)ptr; + *(volatile u16 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u16 */ + + case 4: +#ifdef __xchg_u32 + return __xchg_u32(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u32 *)ptr; + *(volatile u32 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u32 */ + +#ifdef CONFIG_64BIT + case 8: +#ifdef __xchg_u64 + return __xchg_u64(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u64 *)ptr; + *(volatile u64 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u64 */ +#endif /* CONFIG_64BIT */ + + default: + __xchg_called_with_bad_pointer(); + return x; + } +} + +#define xchg(ptr, x) \ + ((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) + +static inline unsigned long __cmpxchg(volatile unsigned long *m, + unsigned long old, unsigned long new) +{ + unsigned long retval; + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); + return retval; +} + +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \ + (unsigned long)(o), \ + (unsigned long)(n))) + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ +#endif /* __ASM_GENERIC_SYSTEM_H */ diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h new file mode 100644 index 000000000000..03cf5936bad6 --- /dev/null +++ b/include/asm-generic/unaligned.h @@ -0,0 +1,30 @@ +#ifndef __ASM_GENERIC_UNALIGNED_H +#define __ASM_GENERIC_UNALIGNED_H + +/* + * This is the most generic implementation of unaligned accesses + * and should work almost anywhere. + * + * If an architecture can handle unaligned accesses in hardware, + * it may want to use the linux/unaligned/access_ok.h implementation + * instead. + */ +#include + +#if defined(__LITTLE_ENDIAN) +# include +# include +# include +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#elif defined(__BIG_ENDIAN) +# include +# include +# include +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#else +# error need to define endianess +#endif + +#endif /* __ASM_GENERIC_UNALIGNED_H */ diff --git a/include/asm-generic/user.h b/include/asm-generic/user.h new file mode 100644 index 000000000000..8b9c3c960aeb --- /dev/null +++ b/include/asm-generic/user.h @@ -0,0 +1,8 @@ +#ifndef __ASM_GENERIC_USER_H +#define __ASM_GENERIC_USER_H +/* + * This file may define a 'struct user' structure. However, it it only + * used for a.out file, which are not supported on new architectures. + */ + +#endif /* __ASM_GENERIC_USER_H */ -- cgit v1.2.3 From ae49e807951d5e26767bc55d1dc29671c596c450 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:34 +0000 Subject: asm-generic: add legacy I/O header files The dma.h, hw_irq.h, serial.h and timex.h files originally described PC-style i8237, i8259A, i8250, i8253 and i8255 chips as well as the VGA style text mode graphics. Modern architectures live happily without these specific interfaces, but a few definitions from these headers keep getting used in common code. The new generic headers are what most architectures use anyway nowadays, just implementing the minimal definitions. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/dma.h | 15 +++++++++++++++ include/asm-generic/hw_irq.h | 9 +++++++++ include/asm-generic/parport.h | 23 +++++++++++++++++++++++ include/asm-generic/serial.h | 13 +++++++++++++ include/asm-generic/timex.h | 22 ++++++++++++++++++++++ include/asm-generic/vga.h | 24 ++++++++++++++++++++++++ 6 files changed, 106 insertions(+) create mode 100644 include/asm-generic/dma.h create mode 100644 include/asm-generic/hw_irq.h create mode 100644 include/asm-generic/parport.h create mode 100644 include/asm-generic/serial.h create mode 100644 include/asm-generic/timex.h create mode 100644 include/asm-generic/vga.h (limited to 'include') diff --git a/include/asm-generic/dma.h b/include/asm-generic/dma.h new file mode 100644 index 000000000000..9dfc3a7f36d2 --- /dev/null +++ b/include/asm-generic/dma.h @@ -0,0 +1,15 @@ +#ifndef __ASM_GENERIC_DMA_H +#define __ASM_GENERIC_DMA_H +/* + * This file traditionally describes the i8237 PC style DMA controller. + * Most architectures don't have these any more and can get the minimal + * implementation from kernel/dma.c by not defining MAX_DMA_CHANNELS. + * + * Some code relies on seeing MAX_DMA_ADDRESS though. + */ +#define MAX_DMA_ADDRESS PAGE_OFFSET + +extern int request_dma(unsigned int dmanr, const char *device_id); +extern void free_dma(unsigned int dmanr); + +#endif /* __ASM_GENERIC_DMA_H */ diff --git a/include/asm-generic/hw_irq.h b/include/asm-generic/hw_irq.h new file mode 100644 index 000000000000..89036d7b40e0 --- /dev/null +++ b/include/asm-generic/hw_irq.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_HW_IRQ_H +#define __ASM_GENERIC_HW_IRQ_H +/* + * hw_irq.h has internal declarations for the low-level interrupt + * controller, like the original i8259A. + * In general, this is not needed for new architectures. + */ + +#endif /* __ASM_GENERIC_HW_IRQ_H */ diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h new file mode 100644 index 000000000000..40528cb977e8 --- /dev/null +++ b/include/asm-generic/parport.h @@ -0,0 +1,23 @@ +#ifndef __ASM_GENERIC_PARPORT_H +#define __ASM_GENERIC_PARPORT_H + +/* + * An ISA bus may have i8255 parallel ports at well-known + * locations in the I/O space, which are scanned by + * parport_pc_find_isa_ports. + * + * Without ISA support, the driver will only attach + * to devices on the PCI bus. + */ + +static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ +#ifdef CONFIG_ISA + return parport_pc_find_isa_ports(autoirq, autodma); +#else + return 0; +#endif +} + +#endif /* __ASM_GENERIC_PARPORT_H */ diff --git a/include/asm-generic/serial.h b/include/asm-generic/serial.h new file mode 100644 index 000000000000..5e291090fe04 --- /dev/null +++ b/include/asm-generic/serial.h @@ -0,0 +1,13 @@ +#ifndef __ASM_GENERIC_SERIAL_H +#define __ASM_GENERIC_SERIAL_H + +/* + * This should not be an architecture specific #define, oh well. + * + * Traditionally, it just describes i8250 and related serial ports + * that have this clock rate. + */ + +#define BASE_BAUD (1843200 / 16) + +#endif /* __ASM_GENERIC_SERIAL_H */ diff --git a/include/asm-generic/timex.h b/include/asm-generic/timex.h new file mode 100644 index 000000000000..b2243cb8d6f6 --- /dev/null +++ b/include/asm-generic/timex.h @@ -0,0 +1,22 @@ +#ifndef __ASM_GENERIC_TIMEX_H +#define __ASM_GENERIC_TIMEX_H + +/* + * If you have a cycle counter, return the value here. + */ +typedef unsigned long cycles_t; +#ifndef get_cycles +static inline cycles_t get_cycles(void) +{ + return 0; +} +#endif + +/* + * Architectures are encouraged to implement read_current_timer + * and define this in order to avoid the expensive delay loop + * calibration during boot. + */ +#undef ARCH_HAS_READ_CURRENT_TIMER + +#endif /* __ASM_GENERIC_TIMEX_H */ diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h new file mode 100644 index 000000000000..36c8ff52016b --- /dev/null +++ b/include/asm-generic/vga.h @@ -0,0 +1,24 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + */ +#ifndef __ASM_GENERIC_VGA_H +#define __ASM_GENERIC_VGA_H + +/* + * On most architectures that support VGA, we can just + * recalculate addresses and then access the videoram + * directly without any black magic. + * + * Everyone else needs to ioremap the address and use + * proper I/O accesses. + */ +#ifndef VGA_MAP_MEM +#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) +#endif + +#define vga_readb(x) (*(x)) +#define vga_writeb(x, y) (*(y) = (x)) + +#endif /* _ASM_GENERIC_VGA_H */ -- cgit v1.2.3 From 3f7e212df82ca0459d44c91d9e019efd1b5f936c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:35 +0000 Subject: asm-generic: add generic atomic.h and io.h atomic.h and io.h are based on the mn10300 architecture, which is already pretty generic and can be used by other architectures that do not have hardware support for atomic operations or out-of-order I/O access. Signed-off-by: Arnd Bergmann --- include/asm-generic/atomic.h | 165 ++++++++++++++++++++++++ include/asm-generic/io.h | 300 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+) create mode 100644 include/asm-generic/atomic.h create mode 100644 include/asm-generic/io.h (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h new file mode 100644 index 000000000000..c99c64dc5f3d --- /dev/null +++ b/include/asm-generic/atomic.h @@ -0,0 +1,165 @@ +/* + * Generic C implementation of atomic counter operations + * Originally implemented for MN10300. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __ASM_GENERIC_ATOMIC_H +#define __ASM_GENERIC_ATOMIC_H + +#ifdef CONFIG_SMP +#error not SMP safe +#endif + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#define ATOMIC_INIT(i) { (i) } + +#ifdef __KERNEL__ + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_set(v, i) (((v)->counter) = (i)) + +#include + +/** + * atomic_add_return - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns the result + * Note that the guaranteed useful range of an atomic_t is only 24 bits. + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + unsigned long flags; + int temp; + + local_irq_save(flags); + temp = v->counter; + temp += i; + v->counter = temp; + local_irq_restore(flags); + + return temp; +} + +/** + * atomic_sub_return - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns the result + * Note that the guaranteed useful range of an atomic_t is only 24 bits. + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + unsigned long flags; + int temp; + + local_irq_save(flags); + temp = v->counter; + temp -= i; + v->counter = temp; + local_irq_restore(flags); + + return temp; +} + +static inline int atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} + +static inline void atomic_add(int i, atomic_t *v) +{ + atomic_add_return(i, v); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + atomic_sub_return(i, v); +} + +static inline void atomic_inc(atomic_t *v) +{ + atomic_add_return(1, v); +} + +static inline void atomic_dec(atomic_t *v) +{ + atomic_sub_return(1, v); +} + +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) +#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) + +#define atomic_add_unless(v, a, u) \ +({ \ + int c, old; \ + c = atomic_read(v); \ + while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) +{ + unsigned long flags; + + mask = ~mask; + local_irq_save(flags); + *addr &= mask; + local_irq_restore(flags); +} + +#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ + (unsigned long)(n), sizeof(*(ptr)))) + +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) + +/* Assume that atomic operations are already serializing */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#include + +#endif /* __KERNEL__ */ +#endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h new file mode 100644 index 000000000000..bcee6365dca0 --- /dev/null +++ b/include/asm-generic/io.h @@ -0,0 +1,300 @@ +/* Generic I/O port emulation, based on MN10300 code + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __ASM_GENERIC_IO_H +#define __ASM_GENERIC_IO_H + +#include /* I/O is all done through memory accesses */ +#include +#include + +#ifdef CONFIG_GENERIC_IOMAP +#include +#endif + +#define mmiowb() do {} while (0) + +/*****************************************************************************/ +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the simple architectures, we just read/write the + * memory location directly. + */ +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *) addr; +} + +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *) addr; +} + +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *) addr; +} + +#define readb __raw_readb +#define readw(addr) __le16_to_cpu(__raw_readw(addr)) +#define readl(addr) __le32_to_cpu(__raw_readl(addr)) + +static inline void __raw_writeb(u8 b, volatile void __iomem *addr) +{ + *(volatile u8 __force *) addr = b; +} + +static inline void __raw_writew(u16 b, volatile void __iomem *addr) +{ + *(volatile u16 __force *) addr = b; +} + +static inline void __raw_writel(u32 b, volatile void __iomem *addr) +{ + *(volatile u32 __force *) addr = b; +} + +#define writeb __raw_writeb +#define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr) +#define writel(b,addr) __raw_writel(__cpu_to_le32(b),addr) + +#ifdef CONFIG_64BIT +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *) addr; +} +#define readq(addr) __le64_to_cpu(__raw_readq(addr)) + +static inline void __raw_writeq(u64 b, volatile void __iomem *addr) +{ + *(volatile u64 __force *) addr = b; +} +#define writeq(b,addr) __raw_writeq(__cpu_to_le64(b),addr) +#endif + +/*****************************************************************************/ +/* + * traditional input/output functions + */ + +static inline u8 inb(unsigned long addr) +{ + return readb((volatile void __iomem *) addr); +} + +static inline u16 inw(unsigned long addr) +{ + return readw((volatile void __iomem *) addr); +} + +static inline u32 inl(unsigned long addr) +{ + return readl((volatile void __iomem *) addr); +} + +static inline void outb(u8 b, unsigned long addr) +{ + writeb(b, (volatile void __iomem *) addr); +} + +static inline void outw(u16 b, unsigned long addr) +{ + writew(b, (volatile void __iomem *) addr); +} + +static inline void outl(u32 b, unsigned long addr) +{ + writel(b, (volatile void __iomem *) addr); +} + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + if (count) { + u8 *buf = buffer; + do { + u8 x = inb(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + if (count) { + u16 *buf = buffer; + do { + u16 x = inw(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + if (count) { + u32 *buf = buffer; + do { + u32 x = inl(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u8 *buf = buffer; + do { + outb(*buf++, addr); + } while (--count); + } +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u16 *buf = buffer; + do { + outw(*buf++, addr); + } while (--count); + } +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u32 *buf = buffer; + do { + outl(*buf++, addr); + } while (--count); + } +} + +#ifndef CONFIG_GENERIC_IOMAP +#define ioread8(addr) readb(addr) +#define ioread16(addr) readw(addr) +#define ioread32(addr) readl(addr) + +#define iowrite8(v, addr) writeb((v), (addr)) +#define iowrite16(v, addr) writew((v), (addr)) +#define iowrite32(v, addr) writel((v), (addr)) + +#define ioread8_rep(p, dst, count) \ + insb((unsigned long) (p), (dst), (count)) +#define ioread16_rep(p, dst, count) \ + insw((unsigned long) (p), (dst), (count)) +#define ioread32_rep(p, dst, count) \ + insl((unsigned long) (p), (dst), (count)) + +#define iowrite8_rep(p, src, count) \ + outsb((unsigned long) (p), (src), (count)) +#define iowrite16_rep(p, src, count) \ + outsw((unsigned long) (p), (src), (count)) +#define iowrite32_rep(p, src, count) \ + outsl((unsigned long) (p), (src), (count)) +#endif /* CONFIG_GENERIC_IOMAP */ + + +#define IO_SPACE_LIMIT 0xffffffff + +#ifdef __KERNEL__ + +#include +#define __io_virt(x) ((void __force *) (x)) + +#ifndef CONFIG_GENERIC_IOMAP +/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ +struct pci_dev; +extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) +{ +} +#endif /* CONFIG_GENERIC_IOMAP */ + +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void *address) +{ + return __pa((unsigned long)address); +} + +static inline void *phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size) +{ + return (void __iomem*) (unsigned long)offset; +} + +#define __ioremap(offset, size, flags) ioremap(offset, size) + +#ifndef ioremap_nocache +#define ioremap_nocache ioremap +#endif + +#ifndef ioremap_wc +#define ioremap_wc ioremap_nocache +#endif + +static inline void iounmap(void *addr) +{ +} + +#ifndef CONFIG_GENERIC_IOMAP +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *) port; +} + +static inline void ioport_unmap(void __iomem *p) +{ +} +#else /* CONFIG_GENERIC_IOMAP */ +extern void __iomem *ioport_map(unsigned long port, unsigned int nr); +extern void ioport_unmap(void __iomem *p); +#endif /* CONFIG_GENERIC_IOMAP */ + +#define xlate_dev_kmem_ptr(p) p +#define xlate_dev_mem_ptr(p) ((void *) (p)) + +#ifndef virt_to_bus +static inline unsigned long virt_to_bus(volatile void *address) +{ + return ((unsigned long) address); +} + +static inline void *bus_to_virt(unsigned long address) +{ + return (void *) address; +} +#endif + +#define memset_io(a, b, c) memset(__io_virt(a), (b), (c)) +#define memcpy_fromio(a, b, c) memcpy((a), __io_virt(b), (c)) +#define memcpy_toio(a, b, c) memcpy(__io_virt(a), (b), (c)) + +#endif /* __KERNEL__ */ + +#endif /* __ASM_GENERIC_IO_H */ -- cgit v1.2.3 From 5c01b46bb6bb8f2662573c05c87b5d68fa25af89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:36 +0000 Subject: asm-generic: add generic NOMMU versions of some headers Memory management in generic is highly architecture specific, but on NOMMU architectures, it is mostly trivial, so just add a default implementation in asm-generic that applies to all NOMMU architectures. The two files cache.h and cacheflush.h can possibly also be used by architectures that have an MMU but never require flushing the cache or have cache lines larger than 32 bytes. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/cache.h | 12 +++++ include/asm-generic/cacheflush.h | 30 ++++++++++++ include/asm-generic/mmu.h | 15 ++++++ include/asm-generic/mmu_context.h | 45 ++++++++++++++++++ include/asm-generic/page.h | 99 +++++++++++++++++++++++++++++++++++++++ include/asm-generic/pgalloc.h | 12 +++++ include/asm-generic/segment.h | 9 ++++ include/asm-generic/tlbflush.h | 18 +++++++ 8 files changed, 240 insertions(+) create mode 100644 include/asm-generic/cache.h create mode 100644 include/asm-generic/cacheflush.h create mode 100644 include/asm-generic/mmu.h create mode 100644 include/asm-generic/mmu_context.h create mode 100644 include/asm-generic/page.h create mode 100644 include/asm-generic/pgalloc.h create mode 100644 include/asm-generic/segment.h create mode 100644 include/asm-generic/tlbflush.h (limited to 'include') diff --git a/include/asm-generic/cache.h b/include/asm-generic/cache.h new file mode 100644 index 000000000000..1bfcfe5c2237 --- /dev/null +++ b/include/asm-generic/cache.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_CACHE_H +#define __ASM_GENERIC_CACHE_H +/* + * 32 bytes appears to be the most common cache line size, + * so make that the default here. Architectures with larger + * cache lines need to provide their own cache.h. + */ + +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif /* __ASM_GENERIC_CACHE_H */ diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h new file mode 100644 index 000000000000..ba4ec39a1131 --- /dev/null +++ b/include/asm-generic/cacheflush.h @@ -0,0 +1,30 @@ +#ifndef __ASM_CACHEFLUSH_H +#define __ASM_CACHEFLUSH_H + +/* Keep includes the same across arches. */ +#include + +/* + * The cache doesn't need to be flushed when TLB entries change when + * the cache is mapped to physical memory, not virtual memory + */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) +#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* __ASM_CACHEFLUSH_H */ diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h new file mode 100644 index 000000000000..4f4aa56d6b52 --- /dev/null +++ b/include/asm-generic/mmu.h @@ -0,0 +1,15 @@ +#ifndef __ASM_GENERIC_MMU_H +#define __ASM_GENERIC_MMU_H + +/* + * This is the mmu.h header for nommu implementations. + * Architectures with an MMU need something more complex. + */ +#ifndef __ASSEMBLY__ +typedef struct { + struct vm_list_struct *vmlist; + unsigned long end_brk; +} mm_context_t; +#endif + +#endif /* __ASM_GENERIC_MMU_H */ diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h new file mode 100644 index 000000000000..a7eec910ba6c --- /dev/null +++ b/include/asm-generic/mmu_context.h @@ -0,0 +1,45 @@ +#ifndef __ASM_GENERIC_MMU_CONTEXT_H +#define __ASM_GENERIC_MMU_CONTEXT_H + +/* + * Generic hooks for NOMMU architectures, which do not need to do + * anything special here. + */ + +#include + +struct task_struct; +struct mm_struct; + +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk) +{ +} + +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + return 0; +} + +static inline void destroy_context(struct mm_struct *mm) +{ +} + +static inline void deactivate_mm(struct task_struct *task, + struct mm_struct *mm) +{ +} + +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ +} + +static inline void activate_mm(struct mm_struct *prev_mm, + struct mm_struct *next_mm) +{ +} + +#endif /* __ASM_GENERIC_MMU_CONTEXT_H */ diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h new file mode 100644 index 000000000000..75fec18cdc59 --- /dev/null +++ b/include/asm-generic/page.h @@ -0,0 +1,99 @@ +#ifndef __ASM_GENERIC_PAGE_H +#define __ASM_GENERIC_PAGE_H +/* + * Generic page.h implementation, for NOMMU architectures. + * This provides the dummy definitions for the memory management. + */ + +#ifdef CONFIG_MMU +#error need to prove a real asm/page.h +#endif + + +/* PAGE_SHIFT determines the page size */ + +#define PAGE_SHIFT 12 +#ifdef __ASSEMBLY__ +#define PAGE_SIZE (1 << PAGE_SHIFT) +#else +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#endif +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#include + +#ifndef __ASSEMBLY__ + +#define get_user_page(vaddr) __get_free_page(GFP_KERNEL) +#define free_user_page(page, addr) free_page(addr) + +#define clear_page(page) memset((page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((to), (from), PAGE_SIZE) + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { + unsigned long pte; +} pte_t; +typedef struct { + unsigned long pmd[16]; +} pmd_t; +typedef struct { + unsigned long pgd; +} pgd_t; +typedef struct { + unsigned long pgprot; +} pgprot_t; +typedef struct page *pgtable_t; + +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((&x)->pmd[0]) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +extern unsigned long memory_start; +extern unsigned long memory_end; + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_KERNEL_RAM_BASE_ADDRESS +#define PAGE_OFFSET (CONFIG_KERNEL_RAM_BASE_ADDRESS) +#else +#define PAGE_OFFSET (0) +#endif + +#ifndef __ASSEMBLY__ + +#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET)) +#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) + +#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) + +#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) +#define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) + +#ifndef page_to_phys +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) +#endif + +#define pfn_valid(pfn) ((pfn) < max_mapnr) + +#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ + ((void *)(kaddr) < (void *)memory_end)) + +#endif /* __ASSEMBLY__ */ + +#include +#include + +#endif /* __ASM_GENERIC_PAGE_H */ diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h new file mode 100644 index 000000000000..9e429d08b1f8 --- /dev/null +++ b/include/asm-generic/pgalloc.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_PGALLOC_H +#define __ASM_GENERIC_PGALLOC_H +/* + * an empty file is enough for a nommu architecture + */ +#ifdef CONFIG_MMU +#error need to implement an architecture specific asm/pgalloc.h +#endif + +#define check_pgt_cache() do { } while (0) + +#endif /* __ASM_GENERIC_PGALLOC_H */ diff --git a/include/asm-generic/segment.h b/include/asm-generic/segment.h new file mode 100644 index 000000000000..5580eace622c --- /dev/null +++ b/include/asm-generic/segment.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_SEGMENT_H +#define __ASM_GENERIC_SEGMENT_H +/* + * Only here because we have some old header files that expect it... + * + * New architectures probably don't want to have their own version. + */ + +#endif /* __ASM_GENERIC_SEGMENT_H */ diff --git a/include/asm-generic/tlbflush.h b/include/asm-generic/tlbflush.h new file mode 100644 index 000000000000..c7af037024c7 --- /dev/null +++ b/include/asm-generic/tlbflush.h @@ -0,0 +1,18 @@ +#ifndef __ASM_GENERIC_TLBFLUSH_H +#define __ASM_GENERIC_TLBFLUSH_H +/* + * This is a dummy tlbflush implementation that can be used on all + * nommu architectures. + * If you have an MMU, you need to write your own functions. + */ +#ifdef CONFIG_MMU +#error need to implement an architecture specific asm/tlbflush.h +#endif + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + BUG(); +} + + +#endif /* __ASM_GENERIC_TLBFLUSH_H */ -- cgit v1.2.3 From eed417ddd52146f446595b5a7d8f21b1814b95b7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:37 +0000 Subject: asm-generic: add a generic uaccess.h Based on discussions with Michal Simek and code from m68knommu and h8300, this version of uaccess.h should be usable by most architectures, by overriding some parts of it. Simple NOMMU architectures can use it out of the box, but a minimal __access_ok() should be added there as well. Cc: Michal Simek Signed-off-by: Arnd Bergmann --- include/asm-generic/uaccess.h | 325 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 include/asm-generic/uaccess.h (limited to 'include') diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h new file mode 100644 index 000000000000..6d8cab22e294 --- /dev/null +++ b/include/asm-generic/uaccess.h @@ -0,0 +1,325 @@ +#ifndef __ASM_GENERIC_UACCESS_H +#define __ASM_GENERIC_UACCESS_H + +/* + * User space memory access functions, these should work + * on a ny machine that has kernel and user data in the same + * address space, e.g. all NOMMU machines. + */ +#include +#include +#include + +#include + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#ifndef KERNEL_DS +#define KERNEL_DS MAKE_MM_SEG(~0UL) +#endif + +#ifndef USER_DS +#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) +#endif + +#ifndef get_fs +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) + +static inline void set_fs(mm_segment_t fs) +{ + current_thread_info()->addr_limit = fs; +} +#endif + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#define access_ok(type, addr, size) __access_ok((unsigned long)(addr),(size)) + +/* + * The architecture should really override this if possible, at least + * doing a check on the get_fs() + */ +#ifndef __access_ok +static inline int __access_ok(unsigned long addr, unsigned long size) +{ + return 1; +} +#endif + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +/* Returns 0 if exception not found and fixup otherwise. */ +extern unsigned long search_exception_table(unsigned long); + +/* + * architectures with an MMU should override these two + */ +#ifndef __copy_from_user +static inline __must_check long __copy_from_user(void *to, + const void __user * from, unsigned long n) +{ + if (__builtin_constant_p(n)) { + switch(n) { + case 1: + *(u8 *)to = *(u8 __force *)from; + return 0; + case 2: + *(u16 *)to = *(u16 __force *)from; + return 0; + case 4: + *(u32 *)to = *(u32 __force *)from; + return 0; +#ifdef CONFIG_64BIT + case 8: + *(u64 *)to = *(u64 __force *)from; + return 0; +#endif + default: + break; + } + } + + memcpy(to, (const void __force *)from, n); + return 0; +} +#endif + +#ifndef __copy_to_user +static inline __must_check long __copy_to_user(void __user *to, + const void *from, unsigned long n) +{ + if (__builtin_constant_p(n)) { + switch(n) { + case 1: + *(u8 __force *)to = *(u8 *)from; + return 0; + case 2: + *(u16 __force *)to = *(u16 *)from; + return 0; + case 4: + *(u32 __force *)to = *(u32 *)from; + return 0; +#ifdef CONFIG_64BIT + case 8: + *(u64 __force *)to = *(u64 *)from; + return 0; +#endif + default: + break; + } + } + + memcpy((void __force *)to, from, n); + return 0; +} +#endif + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * This version just falls back to copy_{from,to}_user, which should + * provide a fast-path for small values. + */ +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __pu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof (*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + __pu_err = __put_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + break; \ + default: \ + __put_user_bad(); \ + break; \ + } \ + __pu_err; \ +}) + +#define put_user(x, ptr) \ +({ \ + might_sleep(); \ + __access_ok(ptr, sizeof (*ptr)) ? \ + __put_user(x, ptr) : \ + -EFAULT; \ +}) + +static inline int __put_user_fn(size_t size, void __user *ptr, void *x) +{ + size = __copy_to_user(ptr, x, size); + return size ? -EFAULT : size; +} + +extern int __put_user_bad(void) __attribute__((noreturn)); + +#define __get_user(x, ptr) \ +({ \ + int __gu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: { \ + unsigned char __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 2: { \ + unsigned short __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 4: { \ + unsigned int __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 8: { \ + unsigned long long __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + default: \ + __get_user_bad(); \ + break; \ + } \ + __gu_err; \ +}) + +#define get_user(x, ptr) \ +({ \ + might_sleep(); \ + __access_ok(ptr, sizeof (*ptr)) ? \ + __get_user(x, ptr) : \ + -EFAULT; \ +}) + +static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) +{ + size = __copy_from_user(x, ptr, size); + return size ? -EFAULT : size; +} + +extern int __get_user_bad(void) __attribute__((noreturn)); + +#ifndef __copy_from_user_inatomic +#define __copy_from_user_inatomic __copy_from_user +#endif + +#ifndef __copy_to_user_inatomic +#define __copy_to_user_inatomic __copy_to_user +#endif + +static inline long copy_from_user(void *to, + const void __user * from, unsigned long n) +{ + might_sleep(); + if (__access_ok(from, n)) + return __copy_from_user(to, from, n); + else + return n; +} + +static inline long copy_to_user(void __user *to, + const void *from, unsigned long n) +{ + might_sleep(); + if (__access_ok(to, n)) + return __copy_to_user(to, from, n); + else + return n; +} + +/* + * Copy a null terminated string from userspace. + */ +#ifndef __strncpy_from_user +static inline long +__strncpy_from_user(char *dst, const char __user *src, long count) +{ + char *tmp; + strncpy(dst, (const char __force *)src, count); + for (tmp = dst; *tmp && count > 0; tmp++, count--) + ; + return (tmp - dst); +} +#endif + +static inline long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + if (!__access_ok(src, 1)) + return -EFAULT; + return __strncpy_from_user(dst, src, count); +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ +#ifndef strnlen_user +static inline long strnlen_user(const char __user *src, long n) +{ + return strlen((void * __force)src) + 1; +} +#endif + +static inline long strlen_user(const char __user *src) +{ + return strnlen_user(src, 32767); +} + +/* + * Zero Userspace + */ +#ifndef __clear_user +static inline __must_check unsigned long +__clear_user(void __user *to, unsigned long n) +{ + memset((void __force *)to, 0, n); + return 0; +} +#endif + +static inline __must_check unsigned long +clear_user(void __user *to, unsigned long n) +{ + might_sleep(); + if (!__access_ok(to, n)) + return n; + + return __clear_user(to, n); +} + +#endif /* __ASM_GENERIC_UACCESS_H */ -- cgit v1.2.3 From 26a28fa4fea5b8c65713aa50c124f76a88c7924d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:38 +0000 Subject: add generic lib/checksum.c Add a generic (unoptimized) implementation of checksum.c in pure C for use by all architectures that cannot be bother with implementing their own version. Based on microblaze code by Michal Simek Cc: Michal Simek Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/checksum.h | 79 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 include/asm-generic/checksum.h (limited to 'include') diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h new file mode 100644 index 000000000000..4647c762d970 --- /dev/null +++ b/include/asm-generic/checksum.h @@ -0,0 +1,79 @@ +#ifndef __ASM_GENERIC_CHECKSUM_H +#define __ASM_GENERIC_CHECKSUM_H + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum); + +/* + * the same as csum_partial_copy, but copies from user space. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *csum_err); + +#define csum_partial_copy_nocheck(src, dst, len, sum) \ + csum_partial_copy((src), (dst), (len), (sum)) + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * Fold a partial checksum + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +#ifndef csum_tcpudp_nofold +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +extern __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum); +#endif + +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +#endif /* __ASM_GENERIC_CHECKSUM_H */ -- cgit v1.2.3 From 73422811d290c628b4ddbf6830e5cd6fa42e84f1 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 10 May 2009 16:05:39 -0400 Subject: reiserfs: allow exposing privroot w/ xattrs enabled This patch adds an -oexpose_privroot option to allow access to the privroot. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- include/linux/reiserfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6473650c28f1..dab68bbed675 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -453,6 +453,7 @@ enum reiserfs_mount_options { REISERFS_ATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, + REISERFS_EXPOSE_PRIVROOT, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, @@ -490,6 +491,7 @@ enum reiserfs_mount_options { #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) +#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) -- cgit v1.2.3 From 2a737871108de9ba8930f7650d549f1383767f8b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2009 11:49:53 -0400 Subject: Cache root in nameidata New field: nd->root. When pathname resolution wants to know the root, check if nd->root.mnt is non-NULL; use nd->root if it is, otherwise copy current->fs->root there. After path_walk() is finished, we check if we'd got a cached value in nd->root and drop it. Before calling path_walk() we should either set nd->root.mnt to NULL *or* copy (and pin down) some path to nd->root. In the latter case we won't be looking at current->fs->root at all. Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 518098fe63af..325dd3ad39a0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -18,6 +18,7 @@ enum { MAX_NESTED_LINKS = 8 }; struct nameidata { struct path path; struct qstr last; + struct path root; unsigned int flags; int last_type; unsigned depth; -- cgit v1.2.3 From 91c9fa8f75877c0c1e455c23e8f8206c91c8f77f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 02:42:05 -0400 Subject: switch rqst_exp_get_by_name() Signed-off-by: Al Viro --- include/linux/nfsd/export.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index bcd0201589f8..98f6fd584d53 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -125,8 +125,7 @@ void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, - struct vfsmount *, - struct dentry *); + struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, struct vfsmount *mnt, struct dentry *dentry); -- cgit v1.2.3 From e64c390ca0b60fd2119331ef1fa888d7ea27e424 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 03:00:46 -0400 Subject: switch rqst_exp_parent() Signed-off-by: Al Viro --- include/linux/nfsd/export.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 98f6fd584d53..a6d9ef2bb34a 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -127,8 +127,7 @@ void exp_readunlock(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, - struct vfsmount *mnt, - struct dentry *dentry); + struct path *); int exp_rootfh(struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -- cgit v1.2.3 From bab77ebf51e3902f608ecf08c9d34a0a52ac35a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 03:26:48 -0400 Subject: switch follow_up() to struct path Signed-off-by: Al Viro --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 325dd3ad39a0..9cd5a717be3b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -79,7 +79,7 @@ extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); extern int follow_down(struct vfsmount **, struct dentry **); -extern int follow_up(struct vfsmount **, struct dentry **); +extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3 From 589ff870ed60a9ebdd5ec99ec3f5afe1282fe151 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 03:28:19 -0400 Subject: Switch collect_mounts() to struct path Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 323b5ce474c1..03fb2102b8f3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1800,7 +1800,7 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *); +extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); -- cgit v1.2.3 From 9393bd07cf218ca51d0e627653f906a9d76a9131 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 13:58:15 -0400 Subject: switch follow_down() Signed-off-by: Al Viro --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 9cd5a717be3b..d870ae2faedc 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -78,7 +78,7 @@ extern void release_open_intent(struct nameidata *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); -extern int follow_down(struct vfsmount **, struct dentry **); +extern int follow_down(struct path *); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3 From 1c755af4df75996b0dd4b7e6cacaf9d57a6ef2ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 14:06:57 -0400 Subject: switch lookup_mnt() Signed-off-by: Al Viro --- include/linux/dcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 97978004338d..72ce2ae88591 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -370,7 +370,7 @@ static inline int d_mountpoint(struct dentry *dentry) return dentry->d_mounted; } -extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *); +extern struct vfsmount *lookup_mnt(struct path *); extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure; -- cgit v1.2.3 From 3174c21b74b56c6a53fddd41a30fd6f757a32bd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2009 13:19:18 -0400 Subject: Move junk from proc_fs.h to fs/proc/internal.h Signed-off-by: Al Viro --- include/linux/proc_fs.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index fbfa3d44d33d..e6e77d31c418 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -93,20 +93,9 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern spinlock_t proc_subdir_lock; - extern void proc_root_init(void); void proc_flush_task(struct task_struct *task); -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); -int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); -unsigned long task_vsize(struct mm_struct *); -int task_statm(struct mm_struct *, int *, int *, int *, int *); -void task_mem(struct seq_file *, struct mm_struct *); -void clear_refs_smap(struct mm_struct *mm); - -struct proc_dir_entry *de_get(struct proc_dir_entry *de); -void de_put(struct proc_dir_entry *de); extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); @@ -116,20 +105,7 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); -extern struct vfsmount *proc_mnt; struct pid_namespace; -extern int proc_fill_super(struct super_block *); -extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); - -/* - * These are generic /proc routines that use the internal - * "struct proc_dir_entry" tree to traverse the filesystem. - * - * The /proc root directory has extended versions to take care - * of the /proc/ subdirectories. - */ -extern int proc_readdir(struct file *, void *, filldir_t); -extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); -- cgit v1.2.3 From d3ef3d7351ccfbef3e5d926efc5ee332136f40d4 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:54 +1000 Subject: fs: mnt_want_write speedup This patch speeds up lmbench lat_mmap test by about 8%. lat_mmap is set up basically to mmap a 64MB file on tmpfs, fault in its pages, then unmap it. A microbenchmark yes, but it exercises some important paths in the mm. Before: avg = 501.9 std = 14.7773 After: avg = 462.286 std = 5.46106 (50 runs of each, stddev gives a reasonable confidence, but there is quite a bit of variation there still) It does this by removing the complex per-cpu locking and counter-cache and replaces it with a percpu counter in struct vfsmount. This makes the code much simpler, and avoids spinlocks (although the msync is still pretty costly, unfortunately). It results in about 900 bytes smaller code too. It does increase the size of a vfsmount, however. It should also give a speedup on large systems if CPUs are frequently operating on different mounts (because the existing scheme has to operate on an atomic in the struct vfsmount when switching between mounts). But I'm most interested in the single threaded path performance for the moment. [AV: minor cleanup] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/mount.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 51f55f903aff..ac49c1f8e5c0 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -30,7 +30,7 @@ struct mnt_namespace; #define MNT_STRICTATIME 0x80 #define MNT_SHRINKABLE 0x100 -#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */ +#define MNT_WRITE_HOLD 0x200 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ @@ -65,13 +65,22 @@ struct vfsmount { int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; - /* - * This value is not stable unless all of the mnt_writers[] spinlocks - * are held, and all mnt_writer[]s on this mount have 0 as their ->count - */ - atomic_t __mnt_writers; +#ifdef CONFIG_SMP + int *mnt_writers; +#else + int mnt_writers; +#endif }; +static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + return mnt->mnt_writers; +#else + return &mnt->mnt_writers; +#endif +} + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) -- cgit v1.2.3 From 96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:55 +1000 Subject: fs: introduce mnt_clone_write This patch speeds up lmbench lat_mmap test by about another 2% after the first patch. Before: avg = 462.286 std = 5.46106 After: avg = 453.12 std = 9.58257 (50 runs of each, stddev gives a reasonable confidence) It does this by introducing mnt_clone_write, which avoids some heavyweight operations of mnt_want_write if called on a vfsmount which we know already has a write count; and mnt_want_write_file, which can call mnt_clone_write if the file is open for write. After these two patches, mnt_want_write and mnt_drop_write go from 7% on the profile down to 1.3% (including mnt_clone_write). [AV: mnt_want_write_file() should take file alone and derive mnt from it; not only all callers have that form, but that's the only mnt about which we know that it's already held for write if file is opened for write] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/mount.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index ac49c1f8e5c0..5d5275364867 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) return mnt; } +struct file; /* forward dec */ + extern int mnt_want_write(struct vfsmount *mnt); +extern int mnt_want_write_file(struct file *file); +extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); -- cgit v1.2.3 From 876a9f76abbcb775f8d21cbc99fa161f9e5937f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Apr 2009 18:05:55 +0200 Subject: remove s_async_list Remove the unused s_async_list in the superblock, a leftover of the broken async inode deletion code that leaked into mainline. Having this in the middle of the sync/unmount path is not helpful for the following cleanups. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 03fb2102b8f3..36bcff7036ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1372,11 +1372,6 @@ struct super_block { * generic_show_options() */ char *s_options; - - /* - * storage for asynchronous operations - */ - struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.3 From 429479f031322a0cc5c921ffb2321a51718dc875 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:50 +0200 Subject: vfs: Make __fsync_super() a static function (version 4) __fsync_super() does the same thing as fsync_super(). So change the only caller to use fsync_super() and make __fsync_super() static. This removes unnecessarily duplicated call to sync_blockdev() and prepares ground for the changes to __fsync_super() in the following patches. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 36bcff7036ef..41a9907f342e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2078,7 +2078,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); extern void sync_filesystems(int wait); -extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, -- cgit v1.2.3 From 5cee5815d1564bbbd505fea86f4550f1efdb5cd0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:51 +0200 Subject: vfs: Make sys_sync() use fsync_super() (version 4) It is unnecessarily fragile to have two places (fsync_super() and do_sync()) doing data integrity sync of the filesystem. Alter __fsync_super() to accommodate needs of both callers and use it. So after this patch __fsync_super() is the only place where we gather all the calls needed to properly send all data on a filesystem to disk. Nice bonus is that we get a complete livelock avoidance and write_supers() is now only used for periodic writeback of superblocks. sync_blockdevs() introduced a couple of patches ago is gone now. [build fixes folded] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- include/linux/writeback.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 41a9907f342e..f00df653cf2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1321,7 +1321,7 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_need_sync_fs; + int s_need_sync; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 93445477f86a..3224820c8514 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,7 +79,6 @@ struct writeback_control { void writeback_inodes(struct writeback_control *wbc); int inode_wait(void *); void sync_inodes_sb(struct super_block *, int wait); -void sync_inodes(int wait); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) -- cgit v1.2.3 From c15c54f5f056ee4819da9fde59a5f2cd45445f23 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:52 +0200 Subject: vfs: Move syncing code from super.c to sync.c (version 4) Move sync_filesystems(), __fsync_super(), fsync_super() from super.c to sync.c where it fits better. [build fixes folded] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index f00df653cf2b..d3f7159993cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_super(struct super_block *); extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} @@ -1959,6 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } #endif +extern int fsync_super(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; @@ -2077,7 +2077,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); -extern void sync_filesystems(int wait); extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, -- cgit v1.2.3 From 60b0680fa236ac4e17ce31a50048c9d75f9ec831 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:53 +0200 Subject: vfs: Rename fsync_super() to sync_filesystem() (version 4) Rename the function so that it better describe what it really does. Also remove the unnecessary include of buffer_head.h. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d3f7159993cf..fb1822bed7c8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1958,7 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } #endif -extern int fsync_super(struct super_block *); +extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; -- cgit v1.2.3 From 850b201b087f5525a0a7278551c2bcd0423c3b26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Apr 2009 16:43:54 +0200 Subject: quota: cleanup dquota sync functions (version 4) Currently the VFS calls vfs_dq_sync to sync out disk quotas for a given superblock. This is a small wrapper around sync_dquots which for the case of a non-NULL superblock is a small wrapper around quota_sync_sb. Just make quota_sync_sb global (rename it to sync_quota_sb) and call it directly. Also call it directly for those cases in quota.c that have a superblock and leave sync_dquots purely an iterator over sync_quota_sb and remove it's superblock argument. To make this nicer move the check for the lack of a quota_sync method from the callers into sync_quota_sb. [folded build fix from Alexander Beregalov ] Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/quotaops.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 36353d95c8db..047310fa22fb 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,7 +20,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) /* * declaration of quota_function calls in kernel. */ -void sync_dquots(struct super_block *sb, int type); +void sync_quota_sb(struct super_block *sb, int type); int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); @@ -253,12 +253,7 @@ static inline void vfs_dq_free_inode(struct inode *inode) inode->i_sb->dq_op->free_inode(inode, 1); } -/* The following two functions cannot be called inside a transaction */ -static inline void vfs_dq_sync(struct super_block *sb) -{ - sync_dquots(sb, -1); -} - +/* Cannot be called inside a transaction */ static inline int vfs_dq_off(struct super_block *sb, int remount) { int ret = -ENOSYS; @@ -334,7 +329,7 @@ static inline void vfs_dq_free_inode(struct inode *inode) { } -static inline void vfs_dq_sync(struct super_block *sb) +static inline void sync_quota_sb(struct super_block *sb, int type) { } -- cgit v1.2.3 From c3f8a40c1cd5591b882497d1d00d43d0e5bb4698 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:55 +0200 Subject: quota: Introduce writeout_quota_sb() (version 4) Introduce this function which just writes all the quota structures but avoids all the syncing and cache pruning work to expose quota structures to userspace. Use this function from __sync_filesystem when wait == 0. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/quotaops.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 047310fa22fb..7bc457593684 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -21,6 +21,11 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) * declaration of quota_function calls in kernel. */ void sync_quota_sb(struct super_block *sb, int type); +static inline void writeout_quota_sb(struct super_block *sb, int type) +{ + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); +} int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); @@ -333,6 +338,10 @@ static inline void sync_quota_sb(struct super_block *sb, int type) { } +static inline void writeout_quota_sb(struct super_block *sb, int type) +{ +} + static inline int vfs_dq_off(struct super_block *sb, int remount) { return 0; -- cgit v1.2.3 From f3da392e9ff14b9f388e74319e6d195848991c07 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 4 May 2009 03:32:03 +0400 Subject: dcache: extrace and use d_unlinked() d_unlinked() will be used in middle-term to ban checkpointing when opened but unlinked file is detected, and in long term, to detect such situation and special case on it. Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro --- include/linux/dcache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 72ce2ae88591..30b93b2a01a4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -353,6 +353,11 @@ static inline int d_unhashed(struct dentry *dentry) return (dentry->d_flags & DCACHE_UNHASHED); } +static inline int d_unlinked(struct dentry *dentry) +{ + return d_unhashed(dentry) && !IS_ROOT(dentry); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; -- cgit v1.2.3 From 62c6943b4b1e818aea60c11c5a68a50785b83119 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 May 2009 03:12:29 -0400 Subject: Trim a bit of crap from fs.h do_remount_sb() is fs/internal.h fodder, fsync_no_super() is long gone. Signed-off-by: Al Viro --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fb1822bed7c8..e7833ef5d1d6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -2079,8 +2078,6 @@ extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); extern void emergency_sync(void); extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif -- cgit v1.2.3 From 9fd5746fd3d7838bf6ff991d50f1257057d1156f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 21 May 2009 16:01:00 -0400 Subject: fs: Remove i_cindex from struct inode The only user of the i_cindex element in the inode structure is used is by the firewire drivers. As part of an attempt to slim down the inode structure to save memory --- since a typical Linux system will have hundreds of thousands if not millions of inodes cached, a reduction in the size inode has high leverage. The firewire driver does not need i_cindex in any fast path, so it's simple enough to calculate when it is needed, instead of wasting space in the inode structure. Signed-off-by: "Theodore Ts'o" Cc: krh@redhat.com Cc: stefanr@s5r6.in-berlin.de Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- include/linux/cdev.h | 2 ++ include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cdev.h b/include/linux/cdev.h index fb4591977b03..f389e319a454 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -28,6 +28,8 @@ int cdev_add(struct cdev *, dev_t, unsigned); void cdev_del(struct cdev *); +int cdev_index(struct inode *inode); + void cd_forget(struct inode *); extern struct backing_dev_info directly_mappable_cdev_bdi; diff --git a/include/linux/fs.h b/include/linux/fs.h index e7833ef5d1d6..bcd63706db87 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -751,7 +751,6 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; }; - int i_cindex; __u32 i_generation; -- cgit v1.2.3 From 28ad0c118b0ed98b042d362acfe0017591921138 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 21 May 2009 16:01:02 -0400 Subject: fs: Rearrange inode structure elements to avoid waste due to padding Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bcd63706db87..d883aa1fc2eb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -729,8 +729,8 @@ struct inode { struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; blkcnt_t i_blocks; + unsigned int i_blkbits; unsigned short i_bytes; umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ -- cgit v1.2.3 From 8688b8635266cf98f00c6b0350ea2dbe7c42c321 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 May 2009 05:45:04 -0400 Subject: linux/magic.h: move cramfs magic out of cramfs_fs.h Signed-off-by: Mike Frysinger CC: Alexander Viro Signed-off-by: Al Viro --- include/linux/cramfs_fs.h | 3 +-- include/linux/magic.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h index 3be4e5a27d82..6fc2bed368b8 100644 --- a/include/linux/cramfs_fs.h +++ b/include/linux/cramfs_fs.h @@ -2,9 +2,8 @@ #define __CRAMFS_H #include +#include -#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ -#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define CRAMFS_SIGNATURE "Compressed ROMFS" /* diff --git a/include/linux/magic.h b/include/linux/magic.h index 927138cf3050..1923327b9869 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -6,6 +6,8 @@ #define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 #define CODA_SUPER_MAGIC 0x73757245 +#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ +#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 -- cgit v1.2.3 From d5aacad548db1ff547adf35d0a77eb2a8ed4fe14 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jun 2009 14:56:44 -0400 Subject: New helper - simple_fsync() writes associated buffers, then does sync_inode() to write the inode itself (and to make it clean). Depends on ->write_inode() honouring the second argument. Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d883aa1fc2eb..ede84fa7da5d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2345,6 +2345,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); +extern int simple_fsync(struct file *, struct dentry *, int); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); -- cgit v1.2.3 From 79d25767583e4e086f8309bfd1f502660a64fe7f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jun 2009 09:30:08 -0400 Subject: Sanitize qnx4 fsync handling * have directory operations use mark_buffer_dirty_inode(), so that sync_mapping_buffers() would get those. * make qnx4_write_inode() honour its last argument. * get rid of insane copies of very ancient "walk the indirect blocks" in qnx4/fsync - they never matched the actual fs layout and, fortunately, never'd been called. Again, all this junk is not needed; ->fsync() should just do sync_mapping_buffers + sync_inode (and if we implement block allocation for qnx4, we'll need to use mark_buffer_dirty_inode() for extent blocks) Signed-off-by: Al Viro --- include/linux/qnx4_fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 787d19ea9f46..acbaec3524e0 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -126,8 +126,6 @@ extern void qnx4_truncate(struct inode *inode); extern void qnx4_free_inode(struct inode *inode); extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); -extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int); -extern int qnx4_sync_inode(struct inode *inode); static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) { -- cgit v1.2.3 From 964f5369667b342994fe3f384e9ba41d404ee796 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jun 2009 09:47:13 -0400 Subject: fs/qnx4: sanitize includes fs-internal parts of qnx4_fs.h taken to fs/qnx4/qnx4.h, includes adjusted, qnx4_fs.h doesn't need unifdef anymore. Signed-off-by: Al Viro --- include/linux/Kbuild | 2 +- include/linux/qnx4_fs.h | 59 ------------------------------------------------- 2 files changed, 1 insertion(+), 60 deletions(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f0eaa397ef5..b3afd2219ad2 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -135,6 +135,7 @@ header-y += posix_types.h header-y += ppdev.h header-y += prctl.h header-y += qnxtypes.h +header-y += qnx4_fs.h header-y += radeonfb.h header-y += raw.h header-y += resource.h @@ -308,7 +309,6 @@ unifdef-y += poll.h unifdef-y += ppp_defs.h unifdef-y += ppp-comp.h unifdef-y += ptrace.h -unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h unifdef-y += irqnr.h diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index acbaec3524e0..8b9aee1a9ce3 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -85,63 +85,4 @@ struct qnx4_super_block { struct qnx4_inode_entry AltBoot; }; -#ifdef __KERNEL__ - -#define QNX4_DEBUG 0 - -#if QNX4_DEBUG -#define QNX4DEBUG(X) printk X -#else -#define QNX4DEBUG(X) (void) 0 -#endif - -struct qnx4_sb_info { - struct buffer_head *sb_buf; /* superblock buffer */ - struct qnx4_super_block *sb; /* our superblock */ - unsigned int Version; /* may be useful */ - struct qnx4_inode_entry *BitMap; /* useful */ -}; - -struct qnx4_inode_info { - struct qnx4_inode_entry raw; - loff_t mmu_private; - struct inode vfs_inode; -}; - -extern struct inode *qnx4_iget(struct super_block *, unsigned long); -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); -extern unsigned long qnx4_count_free_blocks(struct super_block *sb); -extern unsigned long qnx4_block_map(struct inode *inode, long iblock); - -extern struct buffer_head *qnx4_bread(struct inode *, int, int); - -extern const struct inode_operations qnx4_file_inode_operations; -extern const struct inode_operations qnx4_dir_inode_operations; -extern const struct file_operations qnx4_file_operations; -extern const struct file_operations qnx4_dir_operations; -extern int qnx4_is_free(struct super_block *sb, long block); -extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); -extern void qnx4_truncate(struct inode *inode); -extern void qnx4_free_inode(struct inode *inode); -extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); -extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); - -static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) -{ - return container_of(inode, struct qnx4_inode_info, vfs_inode); -} - -static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) -{ - return &qnx4_i(inode)->raw; -} - -#endif /* __KERNEL__ */ - #endif -- cgit v1.2.3 From fbe0efb869efde8d847ede3a925230ef88910086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Tue, 9 Jun 2009 01:50:41 +1000 Subject: drm_calloc_large: check right size, check integer overflow, use GFP_ZERO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we would check size instead of size * nmemb, and so would never hit the vmalloc path. Also add integer overflow check as in kcalloc, and allocate GFP_ZERO pages instead of memset()ing them. Signed-off-by: Kristian Høgsberg Signed-off-by: Dave Airlie --- include/drm/drmP.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index afc21685230e..1cc51a0812fe 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1573,18 +1573,14 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area) static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) { - u8 *addr; - - if (size <= PAGE_SIZE) + if (size * nmemb <= PAGE_SIZE) return kcalloc(nmemb, size, GFP_KERNEL); - addr = vmalloc(nmemb * size); - if (!addr) + if (size != 0 && nmemb > ULONG_MAX / size) return NULL; - memset(addr, 0, nmemb * size); - - return addr; + return __vmalloc(size * nmemb, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); } static __inline void drm_free_large(void *ptr) -- cgit v1.2.3 From 2a71ebcd85bcc4d6607f577f23a491f796c30e82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jun 2009 15:53:10 +1000 Subject: drm/radeon: add rv740 drm support. This adds drm support for the RV740 family of chips to the r600 support code. Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 8b4c80cdd277..c7a1a8dc5ea4 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -254,6 +254,11 @@ {0x1002, 0x940A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ {0x1002, 0x940B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ {0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3 From 715cbb05c935e8a4306a730d14a72d5af881523e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jun 2009 15:55:44 +1000 Subject: drm/radeon: add support for RV790. This adds the PCI IDs for the rv790 which are equiv to the rv770. Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index c7a1a8dc5ea4..f8634ab53b8f 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -273,6 +273,8 @@ {0x1002, 0x9456, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x945A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x945B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x946A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x946B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x947A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3 From 249d6048ca98b5452105b0824abac1275661b8e3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 8 Apr 2009 17:11:16 +0200 Subject: drm: Split out the mm declarations in a separate header. Add atomic operations. this is a TTM preparation patch, it rearranges the mm and add operations needed to do mm operations in atomic context. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/drm/drmP.h | 37 +-------------------- include/drm/drm_mm.h | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 36 deletions(-) create mode 100644 include/drm/drm_mm.h (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 1cc51a0812fe..d4ddc22e46bb 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -86,6 +86,7 @@ struct drm_device; #include "drm_os_linux.h" #include "drm_hashtab.h" +#include "drm_mm.h" #define DRM_UT_CORE 0x01 #define DRM_UT_DRIVER 0x02 @@ -553,26 +554,6 @@ struct drm_sigdata { }; -/* - * Generic memory manager structs - */ - -struct drm_mm_node { - struct list_head fl_entry; - struct list_head ml_entry; - int free; - unsigned long start; - unsigned long size; - struct drm_mm *mm; - void *private; -}; - -struct drm_mm { - struct list_head fl_entry; - struct list_head ml_entry; -}; - - /** * Kernel side of a mapping */ @@ -1436,22 +1417,6 @@ extern char *drm_get_connector_status_name(enum drm_connector_status status); extern int drm_sysfs_connector_add(struct drm_connector *connector); extern void drm_sysfs_connector_remove(struct drm_connector *connector); -/* - * Basic memory manager support (drm_mm.c) - */ -extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent, - unsigned long size, - unsigned alignment); -extern void drm_mm_put_block(struct drm_mm_node * cur); -extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, unsigned long size, - unsigned alignment, int best_match); -extern int drm_mm_init(struct drm_mm *mm, unsigned long start, unsigned long size); -extern void drm_mm_takedown(struct drm_mm *mm); -extern int drm_mm_clean(struct drm_mm *mm); -extern unsigned long drm_mm_tail_space(struct drm_mm *mm); -extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size); -extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size); - /* Graphics Execution Manager library functions (drm_gem.c) */ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h new file mode 100644 index 000000000000..5662f4278ef3 --- /dev/null +++ b/include/drm/drm_mm.h @@ -0,0 +1,90 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + **************************************************************************/ +/* + * Authors: + * Thomas Hellstrom + */ + +#ifndef _DRM_MM_H_ +#define _DRM_MM_H_ + +/* + * Generic range manager structs + */ +#include + +struct drm_mm_node { + struct list_head fl_entry; + struct list_head ml_entry; + int free; + unsigned long start; + unsigned long size; + struct drm_mm *mm; + void *private; +}; + +struct drm_mm { + struct list_head fl_entry; + struct list_head ml_entry; + struct list_head unused_nodes; + int num_unused; + spinlock_t unused_lock; +}; + +/* + * Basic range manager support (drm_mm.c) + */ + +extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent, + unsigned long size, + unsigned alignment); +extern struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent, + unsigned long size, + unsigned alignment); +extern void drm_mm_put_block(struct drm_mm_node *cur); +extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, + unsigned long size, + unsigned alignment, + int best_match); +extern int drm_mm_init(struct drm_mm *mm, unsigned long start, + unsigned long size); +extern void drm_mm_takedown(struct drm_mm *mm); +extern int drm_mm_clean(struct drm_mm *mm); +extern unsigned long drm_mm_tail_space(struct drm_mm *mm); +extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, + unsigned long size); +extern int drm_mm_add_space_to_tail(struct drm_mm *mm, + unsigned long size, int atomic); +extern int drm_mm_pre_get(struct drm_mm *mm); + +static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block) +{ + return block->mm; +} + +#endif -- cgit v1.2.3 From 3c24475c1e4e8d10e50df161d8c4f1d382997a7c Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 8 Apr 2009 18:34:28 +0200 Subject: drm: include kernel list header file in hashtab header Signed-off-by: Dave Airlie --- include/drm/drm_hashtab.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h index cd2b189e1be6..0af087a4d3b3 100644 --- a/include/drm/drm_hashtab.h +++ b/include/drm/drm_hashtab.h @@ -35,6 +35,8 @@ #ifndef DRM_HASHTAB_H #define DRM_HASHTAB_H +#include + #define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) struct drm_hash_item { -- cgit v1.2.3 From ca371c0d7e23d0d0afae65fc83a0e91cf7399573 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 12 Jun 2009 10:33:53 +0300 Subject: memcg: fix page_cgroup fatal error in FLATMEM Now, SLAB is configured in very early stage and it can be used in init routine now. But replacing alloc_bootmem() in FLAT/DISCONTIGMEM's page_cgroup() initialization breaks the allocation, now. (Works well in SPARSEMEM case...it supports MEMORY_HOTPLUG and size of page_cgroup is in reasonable size (< 1 << MAX_ORDER.) This patch revive FLATMEM+memory cgroup by using alloc_bootmem. In future, We stop to support FLATMEM (if no users) or rewrite codes for flatmem completely.But this will adds more messy codes and overheads. Reported-by: Li Zefan Tested-by: Li Zefan Tested-by: Ingo Molnar Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Pekka Enberg --- include/linux/page_cgroup.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 7339c7bf7331..13f126c89ae8 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -18,7 +18,19 @@ struct page_cgroup { }; void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); -void __init page_cgroup_init(void); + +#ifdef CONFIG_SPARSEMEM +static inline void __init page_cgroup_init_flatmem(void) +{ +} +extern void __init page_cgroup_init(void); +#else +void __init page_cgroup_init_flatmem(void); +static inline void __init page_cgroup_init(void) +{ +} +#endif + struct page_cgroup *lookup_page_cgroup(struct page *page); enum { @@ -87,6 +99,10 @@ static inline void page_cgroup_init(void) { } +static inline void __init page_cgroup_init_flatmem(void) +{ +} + #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP -- cgit v1.2.3 From 9a71af2c3627b379b7c31917a7f6ee0d29bc559b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:53 -0600 Subject: module_param: invbool should take a 'bool', not an 'int' It takes an 'int' for historical reasons, and there are only two users: simply switch it over to bool. The other user (uvesafb.c) will get a (harmless-on-x86) warning until the next patch is applied. Cc: Brad Douglas Cc: Michal Januszewski Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index a4f0b931846c..9bbca8e8c19f 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -192,7 +192,7 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp); extern int param_set_invbool(const char *val, struct kernel_param *kp); extern int param_get_invbool(char *buffer, struct kernel_param *kp); -#define param_check_invbool(name, p) __param_check(name, p, int) +#define param_check_invbool(name, p) __param_check(name, p, bool) /* Comma-separated array: *nump is set to number they actually specified. */ #define module_param_array_named(name, array, type, nump, perm) \ -- cgit v1.2.3 From 45fcc70c0b6ee0c508e1fdb5fef735c3546803f4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:56 -0600 Subject: module_param: split perm field into flags and perm Impact: cleanup Rather than hack KPARAM_KMALLOCED into the perm field, separate it out. Since the perm field was 32 bits and only needs 16, we don't add bloat. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 9bbca8e8c19f..009a5f768768 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -36,9 +36,13 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp); /* Returns length written or -errno. Buffer is 4k (ie. be short!) */ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); +/* Flag bits for kernel_param.flags */ +#define KPARAM_KMALLOCED 1 + struct kernel_param { const char *name; - unsigned int perm; + u16 perm; + u16 flags; param_set_fn set; param_get_fn get; union { @@ -88,7 +92,7 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, set, get, { arg } } + = { __param_str_##name, perm, 0, set, get, { arg } } #define module_param_call(name, set, get, arg, perm) \ __module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm) -- cgit v1.2.3 From d2c123c27db841c6c11a63de9c144823d2b1ba76 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:56 -0600 Subject: module_param: add __same_type convenience wrapper for __builtin_types_compatible_p Impact: new API __builtin_types_compatible_p() is a little awkward to use: it takes two types rather than types or variables, and it's just damn long. (typeof(type) == type, so this works on types as well as vars). Signed-off-by: Rusty Russell --- include/linux/compiler.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 37bcb50a4d7c..04fb5135b4e1 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -261,6 +261,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __section(S) __attribute__ ((__section__(#S))) #endif +/* Are two types/vars the same type (ignoring qualifiers)? */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From fddd520122953550ec2c8b60e7ca0d0f0d115d97 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:57 -0600 Subject: module_param: allow 'bool' module_params to be bool, not just int. Impact: API cleanup For historical reasons, 'bool' parameters must be an int, not a bool. But there are around 600 users, so a conversion seems like useless churn. So we use __same_type() to distinguish, and handle both cases. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 009a5f768768..6547c3cdbc4c 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -38,6 +38,7 @@ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); /* Flag bits for kernel_param.flags */ #define KPARAM_KMALLOCED 1 +#define KPARAM_ISBOOL 2 struct kernel_param { const char *name; @@ -83,7 +84,7 @@ struct kparam_array parameters. perm sets the visibility in sysfs: 000 means it's not there, read bits mean it's readable, write bits mean it's writable. */ -#define __module_param_call(prefix, name, set, get, arg, perm) \ +#define __module_param_call(prefix, name, set, get, arg, isbool, perm) \ /* Default value instead of permissions? */ \ static int __param_perm_check_##name __attribute__((unused)) = \ BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ @@ -92,10 +93,13 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, 0, set, get, { arg } } + = { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0, \ + set, get, { arg } } #define module_param_call(name, set, get, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm) + __module_param_call(MODULE_PARAM_PREFIX, \ + name, set, get, arg, \ + __same_type(*(arg), bool), perm) /* Helper functions: type is byte, short, ushort, int, uint, long, ulong, charp, bool or invbool, or XXX if you define param_get_XXX, @@ -124,15 +128,16 @@ struct kparam_array #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ __module_param_call("", name, param_set_##type, param_get_##type, \ - &var, perm) + &var, __same_type(var, bool), perm) #endif /* !MODULE */ /* Actually copy string: maxlen param is usually sizeof(string). */ #define module_param_string(name, string, len, perm) \ static const struct kparam_string __param_string_##name \ = { len, string }; \ - module_param_call(name, param_set_copystring, param_get_string, \ - .str = &__param_string_##name, perm); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + param_set_copystring, param_get_string, \ + .str = &__param_string_##name, 0, perm); \ __MODULE_PARM_TYPE(name, "string") /* Called on module insert or kernel boot */ @@ -190,9 +195,16 @@ extern int param_set_charp(const char *val, struct kernel_param *kp); extern int param_get_charp(char *buffer, struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) +/* For historical reasons "bool" parameters can be (unsigned) "int". */ extern int param_set_bool(const char *val, struct kernel_param *kp); extern int param_get_bool(char *buffer, struct kernel_param *kp); -#define param_check_bool(name, p) __param_check(name, p, int) +#define param_check_bool(name, p) \ + static inline void __check_##name(void) \ + { \ + BUILD_BUG_ON(!__same_type(*(p), bool) && \ + !__same_type(*(p), unsigned int) && \ + !__same_type(*(p), int)); \ + } extern int param_set_invbool(const char *val, struct kernel_param *kp); extern int param_get_invbool(char *buffer, struct kernel_param *kp); @@ -203,8 +215,10 @@ extern int param_get_invbool(char *buffer, struct kernel_param *kp); static const struct kparam_array __param_arr_##name \ = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\ sizeof(array[0]), array }; \ - module_param_call(name, param_array_set, param_array_get, \ - .arr = &__param_arr_##name, perm); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + param_array_set, param_array_get, \ + .arr = &__param_arr_##name, \ + __same_type(array[0], bool), perm); \ __MODULE_PARM_TYPE(name, "array of " #type) #define module_param_array(name, type, nump, perm) \ -- cgit v1.2.3 From ad6561dffa17f17bb68d7207d422c26c381c4313 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:47:03 -0600 Subject: module: trim exception table on init free. It's theoretically possible that there are exception table entries which point into the (freed) init text of modules. These could cause future problems if other modules get loaded into that memory and cause an exception as we'd see the wrong fixup. The only case I know of is kvm-intel.ko (when CONFIG_CC_OPTIMIZE_FOR_SIZE=n). Amerigo fixed this long-standing FIXME in the x86 version, but this patch is more general. This implements trim_init_extable(); most archs are simple since they use the standard lib/extable.c sort code. Alpha and IA64 use relative addresses in their fixups, so thier trimming is a slight variation. Sparc32 is unique; it doesn't seem to define ARCH_HAS_SORT_EXTABLE, yet it defines its own sort_extable() which overrides the one in lib. It doesn't sort, so we have to mark deleted entries instead of actually trimming them. Inspired-by: Amerigo Wang Signed-off-by: Rusty Russell Cc: linux-alpha@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-ia64@vger.kernel.org --- include/linux/module.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index a8f2c0aa4c32..a7bc6e7b43a7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -77,6 +77,7 @@ search_extable(const struct exception_table_entry *first, void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish); void sort_main_extable(void); +void trim_init_extable(struct module *m); #ifdef MODULE #define MODULE_GENERIC_TABLE(gtype,name) \ -- cgit v1.2.3 From f1a3c979059b2033d0b1cc4f9ee5c90bf92b5f94 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 17:56:09 +0200 Subject: perf_counter: PERF_TYPE_HW_CACHE is a hardware counter too is_software_counter() was missing the new HW_CACHE category. ( This could have caused some counter scheduling artifacts with mixed sw and hw counters and counter groups. ) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6e133954e2e4..7c4f32f6ae1a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -621,7 +621,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi, static inline int is_software_counter(struct perf_counter *counter) { return (counter->attr.type != PERF_TYPE_RAW) && - (counter->attr.type != PERF_TYPE_HARDWARE); + (counter->attr.type != PERF_TYPE_HARDWARE) && + (counter->attr.type != PERF_TYPE_HW_CACHE); } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); -- cgit v1.2.3 From 974802eaa1afdc87e00821df7020a2b3c6fee623 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jun 2009 12:46:55 +0200 Subject: perf_counter: Add forward/backward attribute ABI compatibility Provide for means of extending the perf_counter_attr in a 'natural' way. We allow growing the structure by appending fields at the end by specifying the full structure size inside it. When a new kernel sees a smaller (old) structure, it will 0 pad the tail. When an old kernel sees a larger (new) structure, it will verify the tail consists of 0s, otherwise fail. If we fail due to a size-mismatch, we return -E2BIG and write the kernel's native attribe size back into the provided structure. Furthermore, add some attribute verification, so that we'll fail counter creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in the __reserved fields). (This ABI detail is introduced while keeping the existing syscall ABI.) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 19 +++++++++++++++---- include/linux/syscalls.h | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7c4f32f6ae1a..1b3118a1023a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -120,6 +120,8 @@ enum perf_counter_sample_format { PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, + + PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ }; /* @@ -131,17 +133,26 @@ enum perf_counter_read_format { PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, + + PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ }; +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_attr { + /* * Major type: hardware/software/tracepoint/etc. */ __u32 type; - __u32 __reserved_1; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; /* * Type specific configuration information. @@ -168,12 +179,12 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_2 : 53; + __reserved_1 : 53; __u32 wakeup_events; /* wakeup every n events */ - __u32 __reserved_3; + __u32 __reserved_2; - __u64 __reserved_4; + __u64 __reserved_3; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6c84ad8bd71..418d90f5effe 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( - const struct perf_counter_attr __user *attr_uptr, + struct perf_counter_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif -- cgit v1.2.3 From 20f77f5654042cf484d8964b618faf9d620f639b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:16:33 -0600 Subject: virtio: fix obsolete documentation on probe function Signed-off-by: Rusty Russell --- include/linux/virtio.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 06005fa9e982..9410394bbf96 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -99,8 +99,7 @@ void unregister_virtio_device(struct virtio_device *dev); * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this device. * @feature_table_size: number of entries in the feature table array. - * @probe: the function to call when a device is found. Returns a token for - * remove, or PTR_ERR(). + * @probe: the function to call when a device is found. Returns 0 or -errno. * @remove: the function when a device is removed. * @config_changed: optional function to call when the device configuration * changes; may be called in interrupt context. -- cgit v1.2.3 From 9499f5e7ed5224c40706f0cec6542a9916bc7606 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:16:35 -0600 Subject: virtio: add names to virtqueue struct, mapping from devices to queues. Add a linked list of all virtqueues for a virtio device: this helps for debugging and is also needed for upcoming interface change. Also, add a "name" field for clearer debug messages. Signed-off-by: Rusty Russell --- include/linux/virtio.h | 12 ++++++++---- include/linux/virtio_config.h | 6 ++++-- include/linux/virtio_ring.h | 3 ++- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 9410394bbf96..4fca4f5440ba 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,14 +10,17 @@ /** * virtqueue - a queue to register buffers for sending or receiving. + * @list: the chain of virtqueues for this device * @callback: the function to call when buffers are consumed (can be NULL). + * @name: the name of this virtqueue (mainly for debugging) * @vdev: the virtio device this queue was created for. * @vq_ops: the operations for this virtqueue (see below). * @priv: a pointer for the virtqueue implementation to use. */ -struct virtqueue -{ +struct virtqueue { + struct list_head list; void (*callback)(struct virtqueue *vq); + const char *name; struct virtio_device *vdev; struct virtqueue_ops *vq_ops; void *priv; @@ -76,15 +79,16 @@ struct virtqueue_ops { * @dev: underlying device. * @id: the device type identification (used to match it with a driver). * @config: the configuration ops for this device. + * @vqs: the list of virtqueues for this device. * @features: the features supported by both driver and device. * @priv: private pointer for the driver's use. */ -struct virtio_device -{ +struct virtio_device { int index; struct device dev; struct virtio_device_id id; struct virtio_config_ops *config; + struct list_head vqs; /* Note that this is a Linux set_bit-style bitmap. */ unsigned long features[1]; void *priv; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index bf8ec283b232..9fae274751e0 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,7 +55,8 @@ * @find_vq: find a virtqueue and instantiate it. * vdev: the virtio_device * index: the 0-based virtqueue number in case there's more than one. - * callback: the virqtueue callback + * callback: the virtqueue callback + * name: the virtqueue name (mainly for debugging) * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT). * @del_vq: free a virtqueue found by find_vq(). * @get_features: get the array of feature bits for this device. @@ -77,7 +78,8 @@ struct virtio_config_ops void (*reset)(struct virtio_device *vdev); struct virtqueue *(*find_vq)(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *)); + void (*callback)(struct virtqueue *), + const char *name); void (*del_vq)(struct virtqueue *vq); u32 (*get_features)(struct virtio_device *vdev); void (*finalize_features)(struct virtio_device *vdev); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 71e03722fb59..166c519689de 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -119,7 +119,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *vq), - void (*callback)(struct virtqueue *vq)); + void (*callback)(struct virtqueue *vq), + const char *name); void vring_del_virtqueue(struct virtqueue *vq); /* Filter out transport-specific feature bits. */ void vring_transport_features(struct virtio_device *vdev); -- cgit v1.2.3 From d2a7ddda9ffb1c8961abff6714b0f1eb925c120f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 12 Jun 2009 22:16:36 -0600 Subject: virtio: find_vqs/del_vqs virtio operations This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations, and updates all drivers. This is needed for MSI support, because MSI needs to know the total number of vectors upfront. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell (+ lguest/9p compile fixes) --- include/linux/virtio_config.h | 47 +++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 9fae274751e0..4cd290c06a88 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -29,6 +29,7 @@ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 #ifdef __KERNEL__ +#include #include /** @@ -49,16 +50,26 @@ * @set_status: write the status byte * vdev: the virtio_device * status: the new status byte + * @request_vqs: request the specified number of virtqueues + * vdev: the virtio_device + * max_vqs: the max number of virtqueues we want + * If supplied, must call before any virtqueues are instantiated. + * To modify the max number of virtqueues after request_vqs has been + * called, call free_vqs and then request_vqs with a new value. + * @free_vqs: cleanup resources allocated by request_vqs + * vdev: the virtio_device + * If supplied, must call after all virtqueues have been deleted. * @reset: reset the device * vdev: the virtio device * After this, status and feature negotiation must be done again - * @find_vq: find a virtqueue and instantiate it. + * @find_vqs: find virtqueues and instantiate them. * vdev: the virtio_device - * index: the 0-based virtqueue number in case there's more than one. - * callback: the virtqueue callback - * name: the virtqueue name (mainly for debugging) - * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT). - * @del_vq: free a virtqueue found by find_vq(). + * nvqs: the number of virtqueues to find + * vqs: on success, includes new virtqueues + * callbacks: array of callbacks, for each virtqueue + * names: array of virtqueue names (mainly for debugging) + * Returns 0 on success or error status + * @del_vqs: free virtqueues found by find_vqs(). * @get_features: get the array of feature bits for this device. * vdev: the virtio_device * Returns the first 32 feature bits (all we currently need). @@ -67,6 +78,7 @@ * This gives the final feature bits for the device: it can change * the dev->feature bits if it wants. */ +typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -76,11 +88,11 @@ struct virtio_config_ops u8 (*get_status)(struct virtio_device *vdev); void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); - struct virtqueue *(*find_vq)(struct virtio_device *vdev, - unsigned index, - void (*callback)(struct virtqueue *), - const char *name); - void (*del_vq)(struct virtqueue *vq); + int (*find_vqs)(struct virtio_device *, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]); + void (*del_vqs)(struct virtio_device *); u32 (*get_features)(struct virtio_device *vdev); void (*finalize_features)(struct virtio_device *vdev); }; @@ -128,5 +140,18 @@ static inline int virtio_config_buf(struct virtio_device *vdev, vdev->config->get(vdev, offset, buf, len); return 0; } + +static inline +struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, + vq_callback_t *c, const char *n) +{ + vq_callback_t *callbacks[] = { c }; + const char *names[] = { n }; + struct virtqueue *vq; + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + if (err < 0) + return ERR_PTR(err); + return vq; +} #endif /* __KERNEL__ */ #endif /* _LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From 82af8ce84ed65d2fb6d8c017d3f2bbbf161061fb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 May 2009 13:55:41 +0300 Subject: virtio_pci: optional MSI-X support This implements optional MSI-X support in virtio_pci. MSI-X is used whenever the host supports at least 2 MSI-X vectors: 1 for configuration changes and 1 for virtqueues. Per-virtqueue vectors are allocated if enough vectors available. Signed-off-by: Michael S. Tsirkin Acked-by: Anthony Liguori Signed-off-by: Rusty Russell (+ whitespace, style) --- include/linux/virtio_pci.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index cd0fd5d181a6..9a3d7c48c622 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -47,9 +47,17 @@ /* The bit of the ISR which indicates a device configuration change. */ #define VIRTIO_PCI_ISR_CONFIG 0x2 +/* MSI-X registers: only enabled if MSI-X is enabled. */ +/* A 16-bit vector for configuration changes. */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 +/* A 16-bit vector for selected queue notifications. */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff + /* The remaining space is defined by each driver as the per-driver * configuration space */ -#define VIRTIO_PCI_CONFIG 20 +#define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20) /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 -- cgit v1.2.3 From ee006b353f1ca8c9a8470b72b462beb011d62e32 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Mon, 11 May 2009 18:11:44 +0100 Subject: virtio: teach virtio_has_feature() about transport features Drivers don't add transport features to their table, so we shouldn't check these with virtio_check_driver_offered_feature(). We could perhaps add an ->offered_feature() virtio_config_op, but that perhaps that would be overkill for a consitency check like this. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- include/linux/virtio_config.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4cd290c06a88..99f514575f6a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -113,7 +113,9 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, if (__builtin_constant_p(fbit)) BUILD_BUG_ON(fbit >= 32); - virtio_check_driver_offered_feature(vdev, fbit); + if (fbit < VIRTIO_TRANSPORT_F_START) + virtio_check_driver_offered_feature(vdev, fbit); + return test_bit(fbit, vdev->features); } -- cgit v1.2.3 From 9fa29b9df32ba4db055f3977933cd0c1b8fe67cd Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Mon, 11 May 2009 18:11:45 +0100 Subject: virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC) Add a new feature flag for indirect ring entries. These are ring entries which point to a table of buffer descriptors. The idea here is to increase the ring capacity by allowing a larger effective ring size whereby the ring size dictates the number of requests that may be outstanding, rather than the size of those requests. This should be most effective in the case of block I/O where we can potentially benefit by concurrently dispatching a large number of large requests. Even in the simple case of single segment block requests, this results in a threefold increase in ring capacity. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 166c519689de..693e0ec5afa6 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -14,6 +14,8 @@ #define VRING_DESC_F_NEXT 1 /* This marks a buffer as write-only (otherwise read-only). */ #define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest @@ -24,6 +26,9 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { -- cgit v1.2.3 From a32a8813d0173163ba44d8f9556e0d89fdc4fb46 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:02 -0600 Subject: lguest: improve interrupt handling, speed up stream networking lguest never checked for pending interrupts when enabling interrupts, and things still worked. However, it makes a significant difference to TCP performance, so it's time we fixed it by introducing a pending_irq flag and checking it on irq_restore and irq_enable. These two routines are now too big to patch into the 8/10 bytes patch space, so we drop that code. Note: The high latency on interrupt delivery had a very curious effect: once everything else was optimized, networking without GSO was faster than networking with GSO, since more interrupts were sent and hence a greater chance of one getting through to the Guest! Note2: (Almost) Closing the same loophole for iret doesn't have any measurable effect, so I'm leaving that patch for the moment. Before: 1GB tcpblast Guest->Host: 30.7 seconds 1GB tcpblast Guest->Host (no GSO): 76.0 seconds After: 1GB tcpblast Guest->Host: 6.8 seconds 1GB tcpblast Guest->Host (no GSO): 27.8 seconds Signed-off-by: Rusty Russell --- include/linux/lguest.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/lguest.h b/include/linux/lguest.h index 175e63f4a8c0..7bc1440fc473 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -30,6 +30,10 @@ struct lguest_data /* Wallclock time set by the Host. */ struct timespec time; + /* Interrupt pending set by the Host. The Guest should do a hypercall + * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */ + int irq_pending; + /* Async hypercall ring. Instead of directly making hypercalls, we can * place them in here for processing the next time the Host wants. * This batching can be quite efficient. */ -- cgit v1.2.3 From df60aeef4f4fe0645d9a195a7689005520422de5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:09 -0600 Subject: lguest: use eventfds for device notification Currently, when a Guest wants to perform I/O it calls LHCALL_NOTIFY with an address: the main Launcher process returns with this address, and figures out what device to run. A far nicer model is to let processes bind an eventfd to an address: if we find one, we simply signal the eventfd. Signed-off-by: Rusty Russell Cc: Davide Libenzi --- include/linux/lguest_launcher.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index a53407a4165c..9de964b90586 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -58,6 +58,7 @@ enum lguest_req LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ + LHREQ_EVENTFD, /* + address, fd. */ }; /* The alignment to use between consumer and producer parts of vring. -- cgit v1.2.3 From 5dac051bc6030963181b69faddd9e0ad04f85fa8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:10 -0600 Subject: lguest: remove obsolete LHREQ_BREAK call We no longer need an efficient mechanism to force the Guest back into host userspace, as each device is serviced without bothering the main Guest process (aka. the Launcher). Signed-off-by: Rusty Russell --- include/linux/lguest_launcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index 9de964b90586..bfefbdf7498a 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -57,7 +57,7 @@ enum lguest_req LHREQ_INITIALIZE, /* + base, pfnlimit, start */ LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ - LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ + LHREQ_BREAK, /* No longer used */ LHREQ_EVENTFD, /* + address, fd. */ }; -- cgit v1.2.3 From 7e85ee0c1d15ca5f8bff0f514f158eba1742dd87 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 12 Jun 2009 14:03:06 +0300 Subject: slab,slub: don't enable interrupts during early boot As explained by Benjamin Herrenschmidt: Oh and btw, your patch alone doesn't fix powerpc, because it's missing a whole bunch of GFP_KERNEL's in the arch code... You would have to grep the entire kernel for things that check slab_is_available() and even then you'll be missing some. For example, slab_is_available() didn't always exist, and so in the early days on powerpc, we used a mem_init_done global that is set form mem_init() (not perfect but works in practice). And we still have code using that to do the test. Therefore, mask out __GFP_WAIT, __GFP_IO, and __GFP_FS in the slab allocators in early boot code to avoid enabling interrupts. Signed-off-by: Pekka Enberg --- include/linux/gfp.h | 3 +++ include/linux/slab.h | 2 ++ include/linux/slob_def.h | 5 +++++ include/linux/slub_def.h | 2 ++ 4 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0bbc15f54536..3760e7c5de02 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,6 +85,9 @@ struct vm_area_struct; __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_NOMEMALLOC) +/* Control slab gfp mask during early boot */ +#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) + /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) diff --git a/include/linux/slab.h b/include/linux/slab.h index 48803064cedf..219b8fb4651d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -319,4 +319,6 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 0ec00b39d006..bb5368df4be8 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -34,4 +34,9 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } +static inline void kmem_cache_init_late(void) +{ + /* Nothing to do */ +} + #endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index be5d40c43bd2..4dcbc2c71491 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -302,4 +302,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) } #endif +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLUB_DEF_H */ -- cgit v1.2.3 From 7ea2ac9b6632038377cb488c7d1cb60b88164d4d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 14 Apr 2009 23:14:17 -0300 Subject: Trivial: fix typo s/balence/balance/ Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jiri Kosina --- include/linux/ultrasound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ultrasound.h b/include/linux/ultrasound.h index 6b7703e75cec..71339dc531c5 100644 --- a/include/linux/ultrasound.h +++ b/include/linux/ultrasound.h @@ -34,7 +34,7 @@ * _GUS_VOICEOFF - Stops voice (no parameters) * _GUS_VOICEFADE - Stops the voice smoothly. * _GUS_VOICEMODE - Alters the voice mode, don't start or stop voice (P1=voice mode) - * _GUS_VOICEBALA - Sets voice balence (P1, 0=left, 7=middle and 15=right, default 7) + * _GUS_VOICEBALA - Sets voice balance (P1, 0=left, 7=middle and 15=right, default 7) * _GUS_VOICEFREQ - Sets voice (sample) playback frequency (P1=Hz) * _GUS_VOICEVOL - Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off) * _GUS_VOICEVOL2 - Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off) -- cgit v1.2.3 From 638772c7553f6893f7b346bfee4d46851af59afc Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 11 Feb 2009 17:25:24 +0800 Subject: fb: add support of LCD display controller on pxa168/910 (base layer) This driver is originally written by Lennert, modified by Green to be feature complete, and ported by Jun Nie and Kevin Liu for pxa168/910 processors. The patch adds support for the on-chip LCD display controller, it currently supports the base (graphics) layer only. Signed-off-by: Lennert Buytenhek Signed-off-by: Green Wan Cc: Peter Liao Signed-off-by: Jun Nie Signed-off-by: Kevin Liu Acked-by: Krzysztof Helt Signed-off-by: Eric Miao --- include/video/pxa168fb.h | 127 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 include/video/pxa168fb.h (limited to 'include') diff --git a/include/video/pxa168fb.h b/include/video/pxa168fb.h new file mode 100644 index 000000000000..b5cc72fe0461 --- /dev/null +++ b/include/video/pxa168fb.h @@ -0,0 +1,127 @@ +/* + * linux/arch/arm/mach-mmp/include/mach/pxa168fb.h + * + * Copyright (C) 2009 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_MACH_PXA168FB_H +#define __ASM_MACH_PXA168FB_H + +#include +#include + +/* Dumb interface */ +#define PIN_MODE_DUMB_24 0 +#define PIN_MODE_DUMB_18_SPI 1 +#define PIN_MODE_DUMB_18_GPIO 2 +#define PIN_MODE_DUMB_16_SPI 3 +#define PIN_MODE_DUMB_16_GPIO 4 +#define PIN_MODE_DUMB_12_SPI_GPIO 5 +#define PIN_MODE_SMART_18_SPI 6 +#define PIN_MODE_SMART_16_SPI 7 +#define PIN_MODE_SMART_8_SPI_GPIO 8 + +/* Dumb interface pin allocation */ +#define DUMB_MODE_RGB565 0 +#define DUMB_MODE_RGB565_UPPER 1 +#define DUMB_MODE_RGB666 2 +#define DUMB_MODE_RGB666_UPPER 3 +#define DUMB_MODE_RGB444 4 +#define DUMB_MODE_RGB444_UPPER 5 +#define DUMB_MODE_RGB888 6 + +/* default fb buffer size WVGA-32bits */ +#define DEFAULT_FB_SIZE (800 * 480 * 4) + +/* + * Buffer pixel format + * bit0 is for rb swap. + * bit12 is for Y UorV swap + */ +#define PIX_FMT_RGB565 0 +#define PIX_FMT_BGR565 1 +#define PIX_FMT_RGB1555 2 +#define PIX_FMT_BGR1555 3 +#define PIX_FMT_RGB888PACK 4 +#define PIX_FMT_BGR888PACK 5 +#define PIX_FMT_RGB888UNPACK 6 +#define PIX_FMT_BGR888UNPACK 7 +#define PIX_FMT_RGBA888 8 +#define PIX_FMT_BGRA888 9 +#define PIX_FMT_YUV422PACK 10 +#define PIX_FMT_YVU422PACK 11 +#define PIX_FMT_YUV422PLANAR 12 +#define PIX_FMT_YVU422PLANAR 13 +#define PIX_FMT_YUV420PLANAR 14 +#define PIX_FMT_YVU420PLANAR 15 +#define PIX_FMT_PSEUDOCOLOR 20 +#define PIX_FMT_UYVY422PACK (0x1000|PIX_FMT_YUV422PACK) + +/* + * PXA LCD controller private state. + */ +struct pxa168fb_info { + struct device *dev; + struct clk *clk; + struct fb_info *info; + + void __iomem *reg_base; + dma_addr_t fb_start_dma; + u32 pseudo_palette[16]; + + int pix_fmt; + unsigned is_blanked:1; + unsigned panel_rbswap:1; + unsigned active:1; +}; + +/* + * PXA fb machine information + */ +struct pxa168fb_mach_info { + char id[16]; + + int num_modes; + struct fb_videomode *modes; + + /* + * Pix_fmt + */ + unsigned pix_fmt; + + /* + * I/O pin allocation. + */ + unsigned io_pin_allocation_mode:4; + + /* + * Dumb panel -- assignment of R/G/B component info to the 24 + * available external data lanes. + */ + unsigned dumb_mode:4; + unsigned panel_rgb_reverse_lanes:1; + + /* + * Dumb panel -- GPIO output data. + */ + unsigned gpio_output_mask:8; + unsigned gpio_output_data:8; + + /* + * Dumb panel -- configurable output signal polarity. + */ + unsigned invert_composite_blank:1; + unsigned invert_pix_val_ena:1; + unsigned invert_pixclock:1; + unsigned invert_vsync:1; + unsigned invert_hsync:1; + unsigned panel_rbswap:1; + unsigned active:1; + unsigned enable_lcd:1; +}; + +#endif /* __ASM_MACH_PXA168FB_H */ -- cgit v1.2.3 From e39a71ef80877f4e30d808af9acceec80f4d2f7c Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 15 May 2009 00:53:26 +0200 Subject: PM: Rename device_power_down/up() Rename the functions performing "_noirq" dev_pm_ops operations from device_power_down() and device_power_up() to device_suspend_noirq() and device_resume_noirq(). The new function names are chosen to show that the functions are responsible for calling the _noirq() versions to finalize the suspend/resume operation. The current function names do not perform power down/up anymore so the names may be misleading. Global function renames: - device_power_down() -> device_suspend_noirq() - device_power_up() -> device_resume_noirq() Static function renames: - suspend_device_noirq() -> __device_suspend_noirq() - resume_device_noirq() -> __device_resume_noirq() Signed-off-by: Magnus Damm Acked-by: Greg Kroah-Hartman Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 1d4e2d289821..2170252074f3 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -382,12 +382,12 @@ struct dev_pm_info { #ifdef CONFIG_PM_SLEEP extern void device_pm_lock(void); extern int sysdev_resume(void); -extern void device_power_up(pm_message_t state); +extern void device_resume_noirq(pm_message_t state); extern void device_resume(pm_message_t state); extern void device_pm_unlock(void); extern int sysdev_suspend(pm_message_t state); -extern int device_power_down(pm_message_t state); +extern int device_suspend_noirq(pm_message_t state); extern int device_suspend(pm_message_t state); extern int device_prepare_suspend(pm_message_t state); -- cgit v1.2.3 From d161630297a20802d01c55847bfcba85d2118a9f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sun, 24 May 2009 22:05:42 +0200 Subject: PM core: rename suspend and resume functions This patch (as1241) renames a bunch of functions in the PM core. Rather than go through a boring list of name changes, suffice it to say that in the end we have a bunch of pairs of functions: device_resume_noirq dpm_resume_noirq device_resume dpm_resume device_complete dpm_complete device_suspend_noirq dpm_suspend_noirq device_suspend dpm_suspend device_prepare dpm_prepare in which device_X does the X operation on a single device and dpm_X invokes device_X for all devices in the dpm_list. In addition, the old dpm_power_up and device_resume_noirq have been combined into a single function (dpm_resume_noirq). Lastly, dpm_suspend_start and dpm_resume_end are the renamed versions of the former top-level device_suspend and device_resume routines. Signed-off-by: Alan Stern Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 2170252074f3..b3f74764a586 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -382,14 +382,13 @@ struct dev_pm_info { #ifdef CONFIG_PM_SLEEP extern void device_pm_lock(void); extern int sysdev_resume(void); -extern void device_resume_noirq(pm_message_t state); -extern void device_resume(pm_message_t state); +extern void dpm_resume_noirq(pm_message_t state); +extern void dpm_resume_end(pm_message_t state); extern void device_pm_unlock(void); extern int sysdev_suspend(pm_message_t state); -extern int device_suspend_noirq(pm_message_t state); -extern int device_suspend(pm_message_t state); -extern int device_prepare_suspend(pm_message_t state); +extern int dpm_suspend_noirq(pm_message_t state); +extern int dpm_suspend_start(pm_message_t state); extern void __suspend_report_result(const char *function, void *fn, int ret); @@ -403,7 +402,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); #define device_pm_lock() do {} while (0) #define device_pm_unlock() do {} while (0) -static inline int device_suspend(pm_message_t state) +static inline int dpm_suspend_start(pm_message_t state) { return 0; } -- cgit v1.2.3 From e240b58c79144708530138e05f17c6d0d8d744a8 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sun, 24 May 2009 22:05:54 +0200 Subject: PM: Remove bus_type suspend_late()/resume_early() V2 Remove the ->suspend_late() and ->resume_early() callbacks from struct bus_type V2. These callbacks are legacy stuff at this point and since there seem to be no in-tree users we may as well remove them. New users should use dev_pm_ops. Signed-off-by: Magnus Damm Acked-by: Pavel Machek Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 5d5c197bad45..84d79cde9f7d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -62,8 +62,6 @@ struct bus_type { void (*shutdown)(struct device *dev); int (*suspend)(struct device *dev, pm_message_t state); - int (*suspend_late)(struct device *dev, pm_message_t state); - int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); struct dev_pm_ops *pm; -- cgit v1.2.3 From 00725787511e20dbd1fdc1fb233606120ae5c8cf Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 4 Jun 2009 22:13:25 +0200 Subject: PM: Remove device_type suspend()/resume() This patch removes the legacy callbacks ->suspend() and ->resume() from struct device_type. These callbacks seem unused, and new code should instead make use of struct dev_pm_ops. Signed-off-by: Magnus Damm Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 84d79cde9f7d..a4a7b10aaa48 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -289,9 +289,6 @@ struct device_type { int (*uevent)(struct device *dev, struct kobj_uevent_env *env); void (*release)(struct device *dev); - int (*suspend)(struct device *dev, pm_message_t state); - int (*resume)(struct device *dev); - struct dev_pm_ops *pm; }; -- cgit v1.2.3 From fce2b111fae9151a53dabb36513b398d03337a19 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 10 Jun 2009 01:28:19 +0200 Subject: PM/Hibernate: Move NVS routines into a seperate file (v2). The *_nvs_* routines in swsusp.c make use of the io*map() functions, which are only provided for HAS_IOMEM, thus breaking compilation if HAS_IOMEM is not set. Fix this by moving the *_nvs_* routines into hibernate_nvs.c, which is only compiled if HAS_IOMEM is set. [rjw: Change the name of the new file to hibernate_nvs.c, add the license line to the header comment.] Signed-off-by: Cornelia Huck Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 795032edfc46..cd15df6c63cd 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -245,11 +245,6 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct platform_hibernation_ops *ops); extern int hibernate(void); -extern int hibernate_nvs_register(unsigned long start, unsigned long size); -extern int hibernate_nvs_alloc(void); -extern void hibernate_nvs_free(void); -extern void hibernate_nvs_save(void); -extern void hibernate_nvs_restore(void); extern bool system_entering_hibernation(void); #else /* CONFIG_HIBERNATION */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } @@ -258,6 +253,16 @@ static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } +static inline bool system_entering_hibernation(void) { return false; } +#endif /* CONFIG_HIBERNATION */ + +#ifdef CONFIG_HIBERNATION_NVS +extern int hibernate_nvs_register(unsigned long start, unsigned long size); +extern int hibernate_nvs_alloc(void); +extern void hibernate_nvs_free(void); +extern void hibernate_nvs_save(void); +extern void hibernate_nvs_restore(void); +#else /* CONFIG_HIBERNATION_NVS */ static inline int hibernate_nvs_register(unsigned long a, unsigned long b) { return 0; @@ -266,8 +271,7 @@ static inline int hibernate_nvs_alloc(void) { return 0; } static inline void hibernate_nvs_free(void) {} static inline void hibernate_nvs_save(void) {} static inline void hibernate_nvs_restore(void) {} -static inline bool system_entering_hibernation(void) { return false; } -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATION_NVS */ #ifdef CONFIG_PM_SLEEP void save_processor_state(void); -- cgit v1.2.3 From 5818a6e2519b34cd6d0220d89f5729ab2725e1bf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 11 Jun 2009 21:59:21 +0200 Subject: PM: Add empty suspend/resume device irq functions git commit 0a0c5168 "PM: Introduce functions for suspending and resuming device interrupts" introduced some helper functions. However these functions are only available for architectures which support GENERIC_HARDIRQS. Other architectures will see this build error: drivers/built-in.o: In function `sysdev_suspend': (.text+0x15138): undefined reference to `check_wakeup_irqs' drivers/built-in.o: In function `device_power_up': (.text+0x1cb66): undefined reference to `resume_device_irqs' drivers/built-in.o: In function `device_power_down': (.text+0x1cb92): undefined reference to `suspend_device_irqs' To fix this add some empty inline functions for !GENERIC_HARDIRQS. Signed-off-by: Heiko Carstens Signed-off-by: Rafael J. Wysocki --- include/linux/interrupt.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ff374ceface0..c41e812e9d5e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -183,6 +183,7 @@ extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); /* The following three functions are for the core kernel use only. */ +#ifdef CONFIG_GENERIC_HARDIRQS extern void suspend_device_irqs(void); extern void resume_device_irqs(void); #ifdef CONFIG_PM_SLEEP @@ -190,6 +191,11 @@ extern int check_wakeup_irqs(void); #else static inline int check_wakeup_irqs(void) { return 0; } #endif +#else +static inline void suspend_device_irqs(void) { }; +static inline void resume_device_irqs(void) { }; +static inline int check_wakeup_irqs(void) { return 0; } +#endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) -- cgit v1.2.3 From dd14be4c274fc484eccace03ae9726e516630331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20R=C3=B6jfors?= Date: Fri, 5 Jun 2009 15:40:32 +0200 Subject: i2c-ocores: Can add I2C devices to the bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is sometimes a need for the ocores driver to add devices to the bus when installed. i2c_register_board_info can not always be used, because the I2C devices are not known at an early state, they could for instance be connected on a I2C bus on a PCI device which has the Open Cores IP. i2c_new_device can not be used in all cases either since the resulting bus nummer might be unknown. The solution is the pass a list of I2C devices in the platform data to the Open Cores driver. This is useful for MFD drivers. Signed-off-by: Richard Röjfors Signed-off-by: Ben Dooks --- include/linux/i2c-ocores.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h index 8ed591b0887e..4d5e57ff6614 100644 --- a/include/linux/i2c-ocores.h +++ b/include/linux/i2c-ocores.h @@ -14,6 +14,8 @@ struct ocores_i2c_platform_data { u32 regstep; /* distance between registers */ u32 clock_khz; /* input clock in kHz */ + u8 num_devices; /* number of devices in the devices list */ + struct i2c_board_info const *devices; /* devices connected to the bus */ }; #endif /* _LINUX_I2C_OCORES_H */ -- cgit v1.2.3 From 5f5bac8272be791b67c7b7b411e7c8c5847e598a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 22 May 2009 20:33:59 +0200 Subject: mmc: Driver for CB710/720 memory card reader (MMC part) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code is divided in two parts. There is a virtual 'bus' driver that handles PCI device and registers three new devices one per card reader type. The other driver handles SD/MMC part of the reader. Signed-off-by: Michał Mirosław Signed-off-by: Pierre Ossman --- include/linux/cb710.h | 238 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 1 + 2 files changed, 239 insertions(+) create mode 100644 include/linux/cb710.h (limited to 'include') diff --git a/include/linux/cb710.h b/include/linux/cb710.h new file mode 100644 index 000000000000..c3819e1ce1c6 --- /dev/null +++ b/include/linux/cb710.h @@ -0,0 +1,238 @@ +/* + * cb710/cb710.h + * + * Copyright by Michał Mirosław, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef LINUX_CB710_DRIVER_H +#define LINUX_CB710_DRIVER_H + +#ifdef CONFIG_CB710_DEBUG +#define DEBUG +#endif + +/* verify assumptions on platform_device framework */ +#define CONFIG_CB710_DEBUG_ASSUMPTIONS + +#include +#include +#include +#include +#include +#include + +struct cb710_slot; + +typedef int (*cb710_irq_handler_t)(struct cb710_slot *); + +/* per-virtual-slot structure */ +struct cb710_slot { + struct platform_device pdev; + void __iomem *iobase; + cb710_irq_handler_t irq_handler; +}; + +/* per-device structure */ +struct cb710_chip { + struct pci_dev *pdev; + void __iomem *iobase; + unsigned platform_id; +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + atomic_t slot_refs_count; +#endif + unsigned slot_mask; + unsigned slots; + spinlock_t irq_lock; + struct cb710_slot slot[0]; +}; + +/* NOTE: cb710_chip.slots is modified only during device init/exit and + * they are all serialized wrt themselves */ + +/* cb710_chip.slot_mask values */ +#define CB710_SLOT_MMC 1 +#define CB710_SLOT_MS 2 +#define CB710_SLOT_SM 4 + +/* slot port accessors - so the logic is more clear in the code */ +#define CB710_PORT_ACCESSORS(t) \ +static inline void cb710_write_port_##t(struct cb710_slot *slot, \ + unsigned port, u##t value) \ +{ \ + iowrite##t(value, slot->iobase + port); \ +} \ + \ +static inline u##t cb710_read_port_##t(struct cb710_slot *slot, \ + unsigned port) \ +{ \ + return ioread##t(slot->iobase + port); \ +} \ + \ +static inline void cb710_modify_port_##t(struct cb710_slot *slot, \ + unsigned port, u##t set, u##t clear) \ +{ \ + iowrite##t( \ + (ioread##t(slot->iobase + port) & ~clear)|set, \ + slot->iobase + port); \ +} + +CB710_PORT_ACCESSORS(8) +CB710_PORT_ACCESSORS(16) +CB710_PORT_ACCESSORS(32) + +void cb710_pci_update_config_reg(struct pci_dev *pdev, + int reg, uint32_t and, uint32_t xor); +void cb710_set_irq_handler(struct cb710_slot *slot, + cb710_irq_handler_t handler); + +/* some device struct walking */ + +static inline struct cb710_slot *cb710_pdev_to_slot( + struct platform_device *pdev) +{ + return container_of(pdev, struct cb710_slot, pdev); +} + +static inline struct cb710_chip *cb710_slot_to_chip(struct cb710_slot *slot) +{ + return dev_get_drvdata(slot->pdev.dev.parent); +} + +static inline struct device *cb710_slot_dev(struct cb710_slot *slot) +{ + return &slot->pdev.dev; +} + +static inline struct device *cb710_chip_dev(struct cb710_chip *chip) +{ + return &chip->pdev->dev; +} + +/* debugging aids */ + +#ifdef CONFIG_CB710_DEBUG +void cb710_dump_regs(struct cb710_chip *chip, unsigned dump); +#else +#define cb710_dump_regs(c, d) do {} while (0) +#endif + +#define CB710_DUMP_REGS_MMC 0x0F +#define CB710_DUMP_REGS_MS 0x30 +#define CB710_DUMP_REGS_SM 0xC0 +#define CB710_DUMP_REGS_ALL 0xFF +#define CB710_DUMP_REGS_MASK 0xFF + +#define CB710_DUMP_ACCESS_8 0x100 +#define CB710_DUMP_ACCESS_16 0x200 +#define CB710_DUMP_ACCESS_32 0x400 +#define CB710_DUMP_ACCESS_ALL 0x700 +#define CB710_DUMP_ACCESS_MASK 0x700 + +#endif /* LINUX_CB710_DRIVER_H */ +/* + * cb710/sgbuf2.h + * + * Copyright by Michał Mirosław, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef LINUX_CB710_SG_H +#define LINUX_CB710_SG_H + +#include +#include + +/** + * cb710_sg_miter_stop_writing - stop mapping iteration after writing + * @miter: sg mapping iter to be stopped + * + * Description: + * Stops mapping iterator @miter. @miter should have been started + * started using sg_miter_start(). A stopped iteration can be + * resumed by calling sg_miter_next() on it. This is useful when + * resources (kmap) need to be released during iteration. + * + * This is a convenience wrapper that will be optimized out for arches + * that don't need flush_kernel_dcache_page(). + * + * Context: + * IRQ disabled if the SG_MITER_ATOMIC is set. Don't care otherwise. + */ +static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter) +{ + if (miter->page) + flush_kernel_dcache_page(miter->page); + sg_miter_stop(miter); +} + +/* + * 32-bit PIO mapping sg iterator + * + * Hides scatterlist access issues - fragment boundaries, alignment, page + * mapping - for drivers using 32-bit-word-at-a-time-PIO (ie. PCI devices + * without DMA support). + * + * Best-case reading (transfer from device): + * sg_miter_start(); + * cb710_sg_dwiter_write_from_io(); + * cb710_sg_miter_stop_writing(); + * + * Best-case writing (transfer to device): + * sg_miter_start(); + * cb710_sg_dwiter_read_to_io(); + * sg_miter_stop(); + */ + +uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter); +void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data); + +/** + * cb710_sg_dwiter_write_from_io - transfer data to mapped buffer from 32-bit IO port + * @miter: sg mapping iter + * @port: PIO port - IO or MMIO address + * @count: number of 32-bit words to transfer + * + * Description: + * Reads @count 32-bit words from register @port and stores it in + * buffer iterated by @miter. Data that would overflow the buffer + * is silently ignored. Iterator is advanced by 4*@count bytes + * or to the buffer's end whichever is closer. + * + * Context: + * IRQ disabled if the SG_MITER_ATOMIC is set. Don't care otherwise. + */ +static inline void cb710_sg_dwiter_write_from_io(struct sg_mapping_iter *miter, + void __iomem *port, size_t count) +{ + while (count-- > 0) + cb710_sg_dwiter_write_next_block(miter, ioread32(port)); +} + +/** + * cb710_sg_dwiter_read_to_io - transfer data to 32-bit IO port from mapped buffer + * @miter: sg mapping iter + * @port: PIO port - IO or MMIO address + * @count: number of 32-bit words to transfer + * + * Description: + * Writes @count 32-bit words to register @port from buffer iterated + * through @miter. If buffer ends before @count words are written + * missing data is replaced by zeroes. @miter is advanced by 4*@count + * bytes or to the buffer's end whichever is closer. + * + * Context: + * IRQ disabled if the SG_MITER_ATOMIC is set. Don't care otherwise. + */ +static inline void cb710_sg_dwiter_read_to_io(struct sg_mapping_iter *miter, + void __iomem *port, size_t count) +{ + while (count-- > 0) + iowrite32(cb710_sg_dwiter_read_next_block(miter), port); +} + +#endif /* LINUX_CB710_SG_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 19f8e6d1a4d2..9f36e1cdbf01 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2127,6 +2127,7 @@ #define PCI_VENDOR_ID_MAINPINE 0x1522 #define PCI_DEVICE_ID_MAINPINE_PBRIDGE 0x0100 #define PCI_VENDOR_ID_ENE 0x1524 +#define PCI_DEVICE_ID_ENE_CB710_FLASH 0x0510 #define PCI_DEVICE_ID_ENE_CB712_SD 0x0550 #define PCI_DEVICE_ID_ENE_CB712_SD_2 0x0551 #define PCI_DEVICE_ID_ENE_CB714_SD 0x0750 -- cgit v1.2.3 From 9bf69a26ad9ccdc49469402275204271b3336ab6 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Thu, 4 Jun 2009 08:00:40 +0200 Subject: cb710: handle DEBUG define in Makefile Signed-off-by: Pierre Ossman --- include/linux/cb710.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/cb710.h b/include/linux/cb710.h index c3819e1ce1c6..a09213b52616 100644 --- a/include/linux/cb710.h +++ b/include/linux/cb710.h @@ -10,10 +10,6 @@ #ifndef LINUX_CB710_DRIVER_H #define LINUX_CB710_DRIVER_H -#ifdef CONFIG_CB710_DEBUG -#define DEBUG -#endif - /* verify assumptions on platform_device framework */ #define CONFIG_CB710_DEBUG_ASSUMPTIONS -- cgit v1.2.3 From c54f6bc67a4398243682f7438a2129906e127d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 13 Jun 2009 12:37:59 +0200 Subject: cb710: more cleanup for the DEBUG case. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: Pierre Ossman --- include/linux/cb710.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/cb710.h b/include/linux/cb710.h index a09213b52616..63bc9a4d2926 100644 --- a/include/linux/cb710.h +++ b/include/linux/cb710.h @@ -10,9 +10,6 @@ #ifndef LINUX_CB710_DRIVER_H #define LINUX_CB710_DRIVER_H -/* verify assumptions on platform_device framework */ -#define CONFIG_CB710_DEBUG_ASSUMPTIONS - #include #include #include -- cgit v1.2.3 From f0e46cc4971f6be96010d9248e0fc076b229d989 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 4 Jun 2009 20:12:31 +0200 Subject: MFD,mmc: tmio_mmc: make HCLK configurable The Toshiba parts all have a 24 MHz HCLK, but HTC ASIC3 has a 24.576 MHz HCLK and AMD Imageon w228x's HCLK is 80 MHz. With this patch, the MFD driver provides the HCLK frequency to tmio_mmc via mfd_cell->driver_data. Signed-off-by: Philipp Zabel Acked-by: Ian Molton Acked-by: Samuel Ortiz Signed-off-by: Pierre Ossman --- include/linux/mfd/tmio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 516d955ab8a1..c377118884e6 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -18,6 +18,13 @@ writew((val) >> 16, (addr) + 2); \ } while (0) +/* + * data for the MMC controller + */ +struct tmio_mmc_data { + unsigned int hclk; +}; + /* * data for the NAND controller */ -- cgit v1.2.3 From 7923f90fffa8746f6457d4eea2109fd3d6414189 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 14 Jun 2009 22:10:41 +0200 Subject: vmlinux.lds.h update Updated after review by Tim Abbott. - Use HEAD_TEXT_SECTION - Drop use of section-names.h and delete file - Introduce EXIT_CALL Deleting section-names.h required a few simple updates of init.h Signed-off-by: Sam Ravnborg Cc: Tim Abbott --- include/asm-generic/vmlinux.lds.h | 17 ++++++++++------- include/linux/init.h | 4 +--- include/linux/section-names.h | 6 ------ 3 files changed, 11 insertions(+), 16 deletions(-) delete mode 100644 include/linux/section-names.h (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fba42236e942..7381f701f3f3 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -12,7 +12,7 @@ * { * . = START; * __init_begin = .; - * HEAD_SECTION + * HEAD_TEXT_SECTION * INIT_TEXT_SECTION(PAGE_SIZE) * INIT_DATA_SECTION(...) * PERCPU(PAGE_SIZE) @@ -38,7 +38,7 @@ * /DISCARD/ : { * EXIT_TEXT * EXIT_DATA - * *(.exitcall.exit) + * EXIT_CALL * } * STABS_DEBUG * DWARF_DEBUG @@ -52,7 +52,6 @@ * Examples are: [__initramfs_start, __initramfs_end] for initramfs and * [__nosave_begin, __nosave_end] for the nosave data */ - #include #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 @@ -414,9 +413,9 @@ #endif /* Section used for early init (in .S files) */ -#define HEAD_TEXT *(HEAD_TEXT_SECTION) +#define HEAD_TEXT *(.head.text) -#define HEAD_SECTION \ +#define HEAD_TEXT_SECTION \ .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \ HEAD_TEXT \ } @@ -473,6 +472,9 @@ CPU_DISCARD(exit.text) \ MEM_DISCARD(exit.text) +#define EXIT_CALL \ + *(.exitcall.exit) + /* * bss (Block Started by Symbol) - uninitialized data * zeroed during startup @@ -692,7 +694,7 @@ * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which * matches the requirment of PAGE_ALIGNED_DATA. * -/* use 0 as page_align if page_aligned data is not used */ + * use 0 as page_align if page_aligned data is not used */ #define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask) \ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ @@ -726,4 +728,5 @@ #define BSS_SECTION(sbss_align, bss_align) \ SBSS \ BSS(bss_align) \ - . = ALIGN(4); \ + . = ALIGN(4); + diff --git a/include/linux/init.h b/include/linux/init.h index 9f70c9f25d4b..b2189803f19a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -2,8 +2,6 @@ #define _LINUX_INIT_H #include -#include -#include /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) @@ -101,7 +99,7 @@ #define __memexitconst __section(.memexit.rodata) /* For assembly routines */ -#define __HEAD .section __stringify(HEAD_TEXT_SECTION),"ax" +#define __HEAD .section ".head.text","ax" #define __INIT .section ".init.text","ax" #define __FINIT .previous diff --git a/include/linux/section-names.h b/include/linux/section-names.h deleted file mode 100644 index c956f4eb2adf..000000000000 --- a/include/linux/section-names.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __LINUX_SECTION_NAMES_H -#define __LINUX_SECTION_NAMES_H - -#define HEAD_TEXT_SECTION .head.text - -#endif /* !__LINUX_SECTION_NAMES_H */ -- cgit v1.2.3 From 5a7e3d1281bbc4404b250b4a18d3ecb07c77640c Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 13 Jun 2009 14:52:33 +0200 Subject: keyboard: advertise KT_DEAD2 extended diacriticals In addition to KT_DEAD which has limited support for diacriticals, there is KT_DEAD2 that can support 256 criticals, so let's advertise it in . This lets userland know abut the drivers/char/keyboard.c function k_dead2, which supports more than the few trivial ones that k_dead supports. Signed-off-by: Samuel Thibault Signed-off-by: Linus Torvalds --- include/linux/keyboard.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h index a3c984d780f0..33a63f62d57f 100644 --- a/include/linux/keyboard.h +++ b/include/linux/keyboard.h @@ -56,6 +56,7 @@ extern int unregister_keyboard_notifier(struct notifier_block *nb); #define KT_ASCII 9 #define KT_LOCK 10 #define KT_SLOCK 12 +#define KT_DEAD2 13 #define KT_BRL 14 #define K(t,v) (((t)<<8)|(v)) -- cgit v1.2.3