From 4365a5676fa3aa1d5ae6c90c22a0044f09ba584e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 15 Dec 2009 16:45:33 -0800 Subject: oom-kill: fix NUMA constraint check with nodemask Fix node-oriented allocation handling in oom-kill.c I myself think of this as a bugfix not as an ehnancement. In these days, things are changed as - alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask(). - mempolicy don't maintain its own private zonelists. (And cpuset doesn't use nodemask for __alloc_pages_nodemask()) So, current oom-killer's check function is wrong. This patch does - check nodemask, if nodemask && nodemask doesn't cover all node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY. - Scan all zonelist under nodemask, if it hits cpuset's wall this faiulre is from cpuset. And - modifies the caller of out_of_memory not to call oom if __GFP_THISNODE. This doesn't change "current" behavior. If callers use __GFP_THISNODE it should handle "page allocation failure" by itself. - handle __GFP_NOFAIL+__GFP_THISNODE path. This is something like a FIXME but this gfpmask is not used now. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki Acked-by: David Rientjes Cc: Daisuke Nishimura Cc: KOSAKI Motohiro Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/oom.h b/include/linux/oom.h index 6aac5fe4f6f1..537662315627 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include +#include struct zonelist; struct notifier_block; @@ -26,7 +27,8 @@ enum oom_constraint { extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags); extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); -extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); +extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, + int order, nodemask_t *mask); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 35570ac6039ef490b9c5abde1fee4803a39bf4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20R=C3=B6jfors?= Date: Tue, 15 Dec 2009 16:46:18 -0800 Subject: gpio: add GPIO driver for the Timberdale FPGA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A GPIO driver for the Timberdale FPGA found on the Intel Atom board Russellville. The GPIO driver also has an IRQ-chip to support interrupts on the pins. Signed-off-by: Richard Röjfors Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timb_gpio.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/timb_gpio.h (limited to 'include') diff --git a/include/linux/timb_gpio.h b/include/linux/timb_gpio.h new file mode 100644 index 000000000000..ce456eaae861 --- /dev/null +++ b/include/linux/timb_gpio.h @@ -0,0 +1,37 @@ +/* + * timb_gpio.h timberdale FPGA GPIO driver, platform data definition + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LINUX_TIMB_GPIO_H +#define _LINUX_TIMB_GPIO_H + +/** + * struct timbgpio_platform_data - Platform data of the Timberdale GPIO driver + * @gpio_base The number of the first GPIO pin, set to -1 for + * dynamic number allocation. + * @nr_pins Number of pins that is supported by the hardware (1-32) + * @irq_base If IRQ is supported by the hardware, this is the base + * number of IRQ:s. One IRQ per pin will be used. Set to + * -1 if IRQ:s is not supported. + */ +struct timbgpio_platform_data { + int gpio_base; + int nr_pins; + int irq_base; +}; + +#endif -- cgit v1.2.3 From 0769746183caff9d4334be48c7b0e7d2ec8716c4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2009 16:46:20 -0800 Subject: gpiolib: add support for changing value polarity in sysfs Drivers may use gpiolib sysfs as part of their public user space interface. The GPIO number and polarity might change from board to board. The gpio_export_link() call can be used to hide the GPIO number from user space. Add support for also hiding the GPIO line polarity changes from user space. Signed-off-by: Jani Nikula Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/gpio.h | 6 ++++++ include/linux/gpio.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 204bed37e82d..485eeb6c4ef3 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -145,6 +145,7 @@ extern int __gpio_to_irq(unsigned gpio); extern int gpio_export(unsigned gpio, bool direction_may_change); extern int gpio_export_link(struct device *dev, const char *name, unsigned gpio); +extern int gpio_sysfs_set_active_low(unsigned gpio, int value); extern void gpio_unexport(unsigned gpio); #endif /* CONFIG_GPIO_SYSFS */ @@ -197,6 +198,11 @@ static inline int gpio_export_link(struct device *dev, const char *name, return -ENOSYS; } +static inline int gpio_sysfs_set_active_low(unsigned gpio, int value) +{ + return -ENOSYS; +} + static inline void gpio_unexport(unsigned gpio) { } diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 059bd189d35d..4e949a5b5b85 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -99,6 +99,12 @@ static inline int gpio_export_link(struct device *dev, const char *name, return -EINVAL; } +static inline int gpio_sysfs_set_active_low(unsigned gpio, int value) +{ + /* GPIO can never have been requested */ + WARN_ON(1); + return -EINVAL; +} static inline void gpio_unexport(unsigned gpio) { -- cgit v1.2.3 From 9265576daeab1a884b11cc4c1087b72b488ca2e3 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 15 Dec 2009 16:46:35 -0800 Subject: sm501: implement acceleration features This patch provides the acceleration entry points for the SM501 framebuffer driver. This patch provides the sync, copyarea and fillrect entry points, using the SM501's 2D acceleration engine to perform the operations in-chip rather than across the bus. Signed-off-by: Ben Dooks Signed-off-by: Simtec Linux Team Signed-off-by: Vincent Sanders Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sm501-regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h index d53642d2d899..67ed2c542831 100644 --- a/include/linux/sm501-regs.h +++ b/include/linux/sm501-regs.h @@ -31,6 +31,8 @@ #define SM501_SYSCTRL_PCI_SUBSYS_LOCK (1<<11) #define SM501_SYSCTRL_PCI_BURST_READ_EN (1<<15) +#define SM501_SYSCTRL_2D_ENGINE_STATUS (1<<19) + /* miscellaneous control */ #define SM501_MISC_CONTROL (0x000004) -- cgit v1.2.3 From 3611380490c6ce27a2277709a34b8c5531524caf Mon Sep 17 00:00:00 2001 From: Chaithrika U S Date: Tue, 15 Dec 2009 16:46:38 -0800 Subject: davinci: fb: update the driver in preparation for addition of power management features Add a helper function to enable raster. Also add one member in the private data structure to track the current blank status, another function pointer which takes in the platform specific callback function to control panel power. These updates will help in adding suspend/resume and frame buffer blank operation features. Signed-off-by: Chaithrika U S Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/video/da8xx-fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h index c051a50ed528..89d43b3d4cb9 100644 --- a/include/video/da8xx-fb.h +++ b/include/video/da8xx-fb.h @@ -38,6 +38,7 @@ struct da8xx_lcdc_platform_data { const char manu_name[10]; void *controller_data; const char type[25]; + void (*panel_power_ctrl)(int); }; struct lcd_ctrl_config { -- cgit v1.2.3 From 904e812931f001b984912b2d2f653ea69520313c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2009 16:46:52 -0800 Subject: reiserfs: remove /proc/fs/reiserfs/version /proc/fs/reiserfs/version is on the way of removing ->read_proc interface. It's empty however, so simply remove it instead of doing dummy conversion. It's hard to see what information userspace can extract from empty file. Signed-off-by: Alexey Dobriyan Cc: Jeff Mahoney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reiserfs_fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index a05b4a20768d..f65ed0d3a920 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2061,13 +2061,8 @@ struct dentry *reiserfs_get_parent(struct dentry *); int reiserfs_proc_info_init(struct super_block *sb); int reiserfs_proc_info_done(struct super_block *sb); -struct proc_dir_entry *reiserfs_proc_register_global(char *name, - read_proc_t * func); -void reiserfs_proc_unregister_global(const char *name); int reiserfs_proc_info_global_init(void); int reiserfs_proc_info_global_done(void); -int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset, - int count, int *eof, void *data); #if defined( REISERFS_PROC_INFO ) -- cgit v1.2.3 From e3c96f53ac132743fda1384910feb863a2eab916 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2009 16:46:54 -0800 Subject: reiserfs: don't compile procfs.o at all if no support * small define cleanup in header * fix #ifdeffery in procfs.c via Kconfig Signed-off-by: Alexey Dobriyan Cc: Jeff Mahoney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reiserfs_fs.h | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index f65ed0d3a920..c96c1858fe2c 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2051,21 +2051,13 @@ void set_de_name_and_namelen(struct reiserfs_dir_entry *de); int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, struct treepath *path, struct reiserfs_dir_entry *de); struct dentry *reiserfs_get_parent(struct dentry *); -/* procfs.c */ - -#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) -#define REISERFS_PROC_INFO -#else -#undef REISERFS_PROC_INFO -#endif +#ifdef CONFIG_REISERFS_PROC_INFO int reiserfs_proc_info_init(struct super_block *sb); int reiserfs_proc_info_done(struct super_block *sb); int reiserfs_proc_info_global_init(void); int reiserfs_proc_info_global_done(void); -#if defined( REISERFS_PROC_INFO ) - #define PROC_EXP( e ) e #define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data @@ -2079,6 +2071,26 @@ int reiserfs_proc_info_global_done(void); PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \ PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) ) #else +static inline int reiserfs_proc_info_init(struct super_block *sb) +{ + return 0; +} + +static inline int reiserfs_proc_info_done(struct super_block *sb) +{ + return 0; +} + +static inline int reiserfs_proc_info_global_init(void) +{ + return 0; +} + +static inline int reiserfs_proc_info_global_done(void) +{ + return 0; +} + #define PROC_EXP( e ) #define VOID_V ( ( void ) 0 ) #define PROC_INFO_MAX( sb, field, value ) VOID_V -- cgit v1.2.3 From 569b846df54ffb2827b83ce3244c5f032394cba4 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 15 Dec 2009 16:47:03 -0800 Subject: memcg: coalesce uncharge during unmap/truncate In massive parallel enviroment, res_counter can be a performance bottleneck. One strong techinque to reduce lock contention is reducing calls by coalescing some amount of calls into one. Considering charge/uncharge chatacteristic, - charge is done one by one via demand-paging. - uncharge is done by - in chunk at munmap, truncate, exit, execve... - one by one via vmscan/paging. It seems we have a chance to coalesce uncharges for improving scalability at unmap/truncation. This patch is a for coalescing uncharge. For avoiding scattering memcg's structure to functions under /mm, this patch adds memcg batch uncharge information to the task. A reason for per-task batching is for making use of caller's context information. We do batched uncharge (deleyed uncharge) when truncation/unmap occurs but do direct uncharge when uncharge is called by memory reclaim (vmscan.c). The degree of coalescing depends on callers - at invalidate/trucate... pagevec size - at unmap ....ZAP_BLOCK_SIZE (memory itself will be freed in this degree.) Then, we'll not coalescing too much. On x86-64 8cpu server, I tested overheads of memcg at page fault by running a program which does map/fault/unmap in a loop. Running a task per a cpu by taskset and see sum of the number of page faults in 60secs. [without memcg config] 40156968 page-faults # 0.085 M/sec ( +- 0.046% ) 27.67 cache-miss/faults [root cgroup] 36659599 page-faults # 0.077 M/sec ( +- 0.247% ) 31.58 miss/faults [in a child cgroup] 18444157 page-faults # 0.039 M/sec ( +- 0.133% ) 69.96 miss/faults [child with this patch] 27133719 page-faults # 0.057 M/sec ( +- 0.155% ) 47.16 miss/faults We can see some amounts of improvement. (root cgroup doesn't affected by this patch) Another patch for "charge" will follow this and above will be improved more. Changelog(since 2009/10/02): - renamed filed of memcg_batch (as pages to bytes, memsw to memsw_bytes) - some clean up and commentary/description updates. - added initialize code to copy_process(). (possible bug fix) Changelog(old): - fixed !CONFIG_MEM_CGROUP case. - rebased onto the latest mmotm + softlimit fix patches. - unified patch for callers - added commetns. - make ->do_batch as bool. - removed css_get() at el. We don't need it. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 13 +++++++++++++ include/linux/sched.h | 8 ++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bf9213b2db8f..91300c972e76 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -54,6 +54,11 @@ extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); extern void mem_cgroup_del_lru(struct page *page); extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); + +/* For coalescing uncharge for reducing memcg' overhead*/ +extern void mem_cgroup_uncharge_start(void); +extern void mem_cgroup_uncharge_end(void); + extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); extern int mem_cgroup_shmem_charge_fallback(struct page *page, @@ -151,6 +156,14 @@ static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) { } +static inline void mem_cgroup_uncharge_start(void) +{ +} + +static inline void mem_cgroup_uncharge_end(void) +{ +} + static inline void mem_cgroup_uncharge_page(struct page *page) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c858f38e81a..f4c145410a8d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1544,6 +1544,14 @@ struct task_struct { unsigned long trace_recursion; #endif /* CONFIG_TRACING */ unsigned long stack_start; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ + struct memcg_batch_info { + int do_batch; /* incremented when batch uncharge started */ + struct mem_cgroup *memcg; /* target memcg of uncharge */ + unsigned long bytes; /* uncharged usage */ + unsigned long memsw_bytes; /* uncharged mem+swap usage */ + } memcg_batch; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ -- cgit v1.2.3 From d8046582d5ee24448800e71c6933fdb6813aa062 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 15 Dec 2009 16:47:09 -0800 Subject: memcg: make memcg's file mapped consistent with global VM In global VM, FILE_MAPPED is used but memcg uses MAPPED_FILE. This makes grep difficult. Replace memcg's MAPPED_FILE with FILE_MAPPED And in global VM, mapped shared memory is accounted into FILE_MAPPED. But memcg doesn't. fix it. Note: page_is_file_cache() just checks SwapBacked or not. So, we need to check PageAnon. Cc: Balbir Singh Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 91300c972e76..0b46c2068b96 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -122,7 +122,7 @@ static inline bool mem_cgroup_disabled(void) } extern bool mem_cgroup_oom_called(struct task_struct *task); -void mem_cgroup_update_mapped_file_stat(struct page *page, int val); +void mem_cgroup_update_file_mapped(struct page *page, int val); unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, int nid, int zid); @@ -287,7 +287,7 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_mapped_file_stat(struct page *page, +static inline void mem_cgroup_update_file_mapped(struct page *page, int val) { } -- cgit v1.2.3 From 57f9fd7d25ac9a0d7e3a4ced580e780ab4524e3b Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Tue, 15 Dec 2009 16:47:11 -0800 Subject: memcg: cleanup mem_cgroup_move_parent() mem_cgroup_move_parent() calls try_charge first and cancel_charge on failure. IMHO, charge/uncharge(especially charge) is high cost operation, so we should avoid it as far as possible. This patch tries to delay try_charge in mem_cgroup_move_parent() by re-ordering checks it does. And this patch renames mem_cgroup_move_account() to __mem_cgroup_move_account(), changes the return value of __mem_cgroup_move_account() from int to void, and adds a new wrapper(mem_cgroup_move_account()), which checks whether a @pc is valid for moving account and calls __mem_cgroup_move_account(). This patch removes the last caller of trylock_page_cgroup(), so removes its definition too. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 4b938d4f3ac2..b0e4eb126236 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -57,6 +57,8 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ { return test_and_clear_bit(PCG_##lname, &pc->flags); } +TESTPCGFLAG(Locked, LOCK) + /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) CLEARPCGFLAG(Cache, CACHE) @@ -86,11 +88,6 @@ static inline void lock_page_cgroup(struct page_cgroup *pc) bit_spin_lock(PCG_LOCK, &pc->flags); } -static inline int trylock_page_cgroup(struct page_cgroup *pc) -{ - return bit_spin_trylock(PCG_LOCK, &pc->flags); -} - static inline void unlock_page_cgroup(struct page_cgroup *pc) { bit_spin_unlock(PCG_LOCK, &pc->flags); -- cgit v1.2.3 From c6a47cc2ccf9649ee09eeddd70a6d061bde69568 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:15 -0800 Subject: ptrace: cleanup ptrace_init_task()->ptrace_link() path No functional changes. ptrace_init_task() looks confusing, as if we always auto-attach when "bool ptrace" argument is true, while in fact we attach only if current is traced. Make the code more explicit and kill now unused ptrace_link(). Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 7456d7d87a19..1951805df63a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -105,12 +105,7 @@ static inline int ptrace_reparented(struct task_struct *child) { return child->real_parent != child->parent; } -static inline void ptrace_link(struct task_struct *child, - struct task_struct *new_parent) -{ - if (unlikely(child->ptrace)) - __ptrace_link(child, new_parent); -} + static inline void ptrace_unlink(struct task_struct *child) { if (unlikely(child->ptrace)) @@ -169,9 +164,9 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) INIT_LIST_HEAD(&child->ptraced); child->parent = child->real_parent; child->ptrace = 0; - if (unlikely(ptrace)) { + if (unlikely(ptrace) && (current->ptrace & PT_PTRACED)) { child->ptrace = current->ptrace; - ptrace_link(child, current->parent); + __ptrace_link(child, current->parent); } } -- cgit v1.2.3 From 85ec7fd9f8e528c4f61d595cfe4df7681a19f252 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:17 -0800 Subject: ptrace: introduce user_single_step_siginfo() helper Suggested by Roland. Currently there is no way to synthesize a single-stepping trap in the arch-independent manner. This patch adds the default helper which fills siginfo_t, arch/ can can override it. Architetures which implement user_enable_single_step() should add user_single_step_siginfo() also. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 1951805df63a..56f2d63a5cbb 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -273,6 +273,18 @@ static inline void user_enable_block_step(struct task_struct *task) } #endif /* arch_has_block_step */ +#ifdef ARCH_HAS_USER_SINGLE_STEP_INFO +extern void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, siginfo_t *info); +#else +static inline void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, siginfo_t *info) +{ + memset(info, 0, sizeof(*info)); + info->si_signo = SIGTRAP; +} +#endif + #ifndef arch_ptrace_stop_needed /** * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called -- cgit v1.2.3 From 2f0edac5555983dc28033acce8a355f588fd01b2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:19 -0800 Subject: ptrace: change tracehook_report_syscall_exit() to handle stepping Suggested by Roland. Change tracehook_report_syscall_exit() to look at step flag and send the trap signal if needed. This change affects ia64, microblaze, parisc, powerpc, sh. They pass nonzero "step" argument to tracehook but since it was ignored the tracee reports via ptrace_notify(), this is not right and not consistent. - PTRACE_SETSIGINFO doesn't work - if the tracer resumes the tracee with signr != 0 the new signal is generated rather than delivering it - If PT_TRACESYSGOOD is set the tracee reports the wrong exit_code I don't have a powerpc machine, but I think this test-case should see the difference: #include #include #include #include #include int main(void) { int pid, status; if (!(pid = fork())) { assert(ptrace(PTRACE_TRACEME) == 0); kill(getpid(), SIGSTOP); getppid(); return 0; } assert(pid == wait(&status)); assert(ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_TRACESYSGOOD) == 0); assert(ptrace(PTRACE_SYSCALL, pid, 0,0) == 0); assert(pid == wait(&status)); assert(ptrace(PTRACE_SINGLESTEP, pid, 0,0) == 0); assert(pid == wait(&status)); if (status == 0x57F) return 0; printf("kernel bug: status=%X shouldn't have 0x80\n", status); return 1; } Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 1eb44a924e56..10db0102a890 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -134,6 +134,13 @@ static inline __must_check int tracehook_report_syscall_entry( */ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) { + if (step) { + siginfo_t info; + user_single_step_siginfo(current, regs, &info); + force_sig_info(SIGTRAP, &info, current); + return; + } + ptrace_report_syscall(regs); } -- cgit v1.2.3 From 614c517d7c00af1b26ded20646b329397d6f51a1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:22 -0800 Subject: signals: SEND_SIG_NOINFO should be considered as SI_FROMUSER() No changes in compiled code. The patch adds the new helper, si_fromuser() and changes check_kill_permission() to use this helper. The real effect of this patch is that from now we "officially" consider SEND_SIG_NOINFO signal as "from user-space" signals. This is already true if we look at the code which uses SEND_SIG_NOINFO, except __send_signal() has another opinion - see the next patch. The naming of these special SEND_SIG_XXX siginfo's is really bad imho. From __send_signal()'s pov they mean SEND_SIG_NOINFO from user SEND_SIG_PRIV from kernel SEND_SIG_FORCED no info Signed-off-by: Oleg Nesterov Cc: Roland McGrath Reviewed-by: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f4c145410a8d..57b3516f055b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2102,11 +2102,6 @@ static inline int kill_cad_pid(int sig, int priv) #define SEND_SIG_PRIV ((struct siginfo *) 1) #define SEND_SIG_FORCED ((struct siginfo *) 2) -static inline int is_si_special(const struct siginfo *info) -{ - return info <= SEND_SIG_FORCED; -} - /* * True if we are on the alternate signal stack. */ -- cgit v1.2.3 From ad09750b51150ca87531b8790a379214a974c167 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:25 -0800 Subject: signals: kill force_sig_specific() Kill force_sig_specific(), this trivial wrapper has no callers. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 57b3516f055b..244c287a5ac1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2083,7 +2083,6 @@ extern int kill_proc_info(int, struct siginfo *, pid_t); extern int do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); -extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); extern void zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); -- cgit v1.2.3 From b97e820ffffbf49e94ed60c9c26f1a54bccae924 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 15 Dec 2009 16:47:32 -0800 Subject: ipc/sem.c: add a per-semaphore pending list Based on Nick's findings: sysv sem has the concept of semaphore arrays that consist out of multiple semaphores. Atomic operations that affect multiple semaphores are supported. The patch is the first step for optimizing simple, single semaphore operations: In addition to the global list of all pending operations, a 2nd, per-semaphore list with the simple operations is added. Note: this patch does not make sense by itself, the new list is used nowhere. Signed-off-by: Manfred Spraul Cc: Nick Piggin Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sem.h b/include/linux/sem.h index 1b191c176bcd..8a4adbef8a0f 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -86,6 +86,7 @@ struct task_struct; struct sem { int semval; /* current value */ int sempid; /* pid of last operation */ + struct list_head sem_pending; /* pending single-sop operations */ }; /* One sem_array data structure for each set of semaphores in the system. */ @@ -96,11 +97,13 @@ struct sem_array { struct sem *sem_base; /* ptr to first semaphore in array */ struct list_head sem_pending; /* pending operations to be processed */ struct list_head list_id; /* undo requests on this array */ - unsigned long sem_nsems; /* no. of semaphores in array */ + int sem_nsems; /* no. of semaphores in array */ + int complex_count; /* pending complex operations */ }; /* One queue for each sleeping process in the system. */ struct sem_queue { + struct list_head simple_list; /* queue of pending operations */ struct list_head list; /* queue of pending operations */ struct task_struct *sleeper; /* this process */ struct sem_undo *undo; /* undo structure */ -- cgit v1.2.3 From 9cf18e1dd74cd0061d58ac55029784ca3dd88f6a Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 15 Dec 2009 16:47:36 -0800 Subject: ipc: HARD_MSGMAX should be higher not lower on 64bit We have HARD_MSGMAX lower on 64bit than on 32bit, since usually 64bit machines have more memory than 32bit machines. Making it higher on 64bit seems reasonable, and keep the original number on 32bit. Acked-by: Serge E. Hallyn Cc: Cedric Le Goater Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e408722a84c7..07baa38bce37 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -87,7 +87,7 @@ extern int mq_init_ns(struct ipc_namespace *ns); /* default values */ #define DFLT_QUEUESMAX 256 /* max number of message queues */ #define DFLT_MSGMAX 10 /* max number of messages in each queue */ -#define HARD_MSGMAX (131072/sizeof(void *)) +#define HARD_MSGMAX (32768*sizeof(void *)/4) #define DFLT_MSGSIZEMAX 8192 /* max message size */ #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } -- cgit v1.2.3 From 06a7f711246b081afc21fff859f1003f1f2a0fbc Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 15 Dec 2009 16:47:46 -0800 Subject: kexec: premit reduction of the reserved memory size Implement shrinking the reserved memory for crash kernel, if it is more than enough. For example, if you have already reserved 128M, now you just want 100M, you can do: # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size Note, you can only do this before loading the crash kernel. Signed-off-by: WANG Cong Cc: Neil Horman Acked-by: Eric W. Biederman Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index adc34f2c6eff..c356b6914ffd 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -206,6 +206,8 @@ extern size_t vmcoreinfo_max_size; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +int crash_shrink_memory(unsigned long new_size); +size_t crash_get_memory_size(void); #else /* !CONFIG_KEXEC */ struct pt_regs; -- cgit v1.2.3 From fac046ad0b1ee2c4244ebf43a26433ef0ea29ae4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 15 Dec 2009 16:47:47 -0800 Subject: aio: remove unused field Don't know the reason, but it appears ki_wait field of iocb never gets used. Signed-off-by: Shaohua Li Cc: Jeff Moyer Cc: Benjamin LaHaise Cc: Zach Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index aea219d7d8d1..811dbb369379 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -102,7 +102,6 @@ struct kiocb { } ki_obj; __u64 ki_user_data; /* user's data for completion */ - wait_queue_t ki_wait; loff_t ki_pos; void *private; @@ -140,7 +139,6 @@ struct kiocb { (x)->ki_dtor = NULL; \ (x)->ki_obj.tsk = tsk; \ (x)->ki_user_data = 0; \ - init_wait((&(x)->ki_wait)); \ } while (0) #define AIO_RING_MAGIC 0xa10a10a1 @@ -223,8 +221,6 @@ struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } #endif /* CONFIG_AIO */ -#define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) - static inline struct kiocb *list_kiocb(struct list_head *h) { return list_entry(h, struct kiocb, ki_list); -- cgit v1.2.3 From 5fe878ae7f82fbf0830dbfaee4c5ca18f3aee442 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Dec 2009 16:47:50 -0800 Subject: direct-io: cleanup blockdev_direct_IO locking Currently the locking in blockdev_direct_IO is a mess, we have three different locking types and very confusing checks for some of them. The most complicated one is DIO_OWN_LOCKING for reads, which happens to not actually be used. This patch gets rid of the DIO_OWN_LOCKING - as mentioned above the read case is unused anyway, and the write side is almost identical to DIO_NO_LOCKING. The difference is that DIO_NO_LOCKING always sets the create argument for the get_blocks callback to zero, but we can easily move that to the actual get_blocks callbacks. There are four users of the DIO_NO_LOCKING mode: gfs already ignores the create argument and thus is fine with the new version, ocfs2 only errors out if create were ever set, and we can remove this dead code now, the block device code only ever uses create for an error message if we are fully beyond the device which can never happen, and last but not least XFS will need the new behavour for writes. Now we can replace the lock_type variable with a flags one, where no flag means the DIO_NO_LOCKING behaviour and DIO_LOCKING is kept as the first flag. Separate out the check for not allowing to fill holes into a separate flag, although for now both flags always get set at the same time. Also revamp the documentation of the locking scheme to actually make sense. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Christoph Hellwig Cc: Dave Chinner Cc: Badari Pulavarty Cc: Jeff Moyer Cc: Jens Axboe Cc: Zach Brown Cc: Al Viro Cc: Alex Elder Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a057f48eb156..b23a7018eb90 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2264,9 +2264,11 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int lock_type); enum { - DIO_LOCKING = 1, /* need locking between buffered and direct access */ - DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ - DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ + /* need locking between buffered and direct access */ + DIO_LOCKING = 0x01, + + /* filesystem does not support filling holes */ + DIO_SKIP_HOLES = 0x02, }; static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, @@ -2275,7 +2277,8 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_LOCKING); + nr_segs, get_block, end_io, + DIO_LOCKING | DIO_SKIP_HOLES); } static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, @@ -2284,16 +2287,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_NO_LOCKING); -} - -static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_OWN_LOCKING); + nr_segs, get_block, end_io, 0); } #endif -- cgit v1.2.3 From f65380c07b64b0e45ad5c7d70ca54a9cde1c00db Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 15 Dec 2009 16:48:21 -0800 Subject: resource: constify arg to resource_size() and resource_type() resource_size() doesn't change the resource it operates on, so the res parameter can be marked const. Same for resource_type(). Signed-off-by: Jean Delvare Reviewed-by: WANG Cong Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 83aa81297ea3..7129504e053d 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -126,11 +126,11 @@ extern int allocate_resource(struct resource *root, struct resource *new, int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); -static inline resource_size_t resource_size(struct resource *res) +static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; } -static inline unsigned long resource_type(struct resource *res) +static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } -- cgit v1.2.3 From c1a2a962a2ad103846e7950b4591471fabecece7 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 15 Dec 2009 16:48:25 -0800 Subject: bitmap: introduce bitmap_set, bitmap_clear, bitmap_find_next_zero_area This introduces new bitmap functions: bitmap_set: Set specified bit area bitmap_clear: Clear specified bit area bitmap_find_next_zero_area: Find free bit area These are mostly stolen from iommu helper. The differences are: - Use find_next_bit instead of doing test_bit for each bit - Rewrite bitmap_set and bitmap_clear Instead of setting or clearing for each bit. - Check the last bit of the limit iommu-helper doesn't want to find such area - The return value if there is no zero area find_next_zero_area in iommu helper: returns -1 bitmap_find_next_zero_area: return >= bitmap size Signed-off-by: Akinobu Mita Cc: FUJITA Tomonori Cc: "David S. Miller" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: Lothar Wassmann Cc: Roland Dreier Cc: Yevgeny Petrilin Cc: Tony Luck Cc: Fenghua Yu Cc: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 756d78b8c1c5..daf8c480c786 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -42,6 +42,9 @@ * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_set(dst, pos, nbits) Set specified bit area + * bitmap_clear(dst, pos, nbits) Clear specified bit area + * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) @@ -108,6 +111,14 @@ extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern int __bitmap_weight(const unsigned long *bitmap, int bits); +extern void bitmap_set(unsigned long *map, int i, int len); +extern void bitmap_clear(unsigned long *map, int start, int nr); +extern unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask); + extern int bitmap_scnprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, -- cgit v1.2.3 From a66022c457755b5eef61e30866114679c95e1f54 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 15 Dec 2009 16:48:28 -0800 Subject: iommu-helper: use bitmap library Use bitmap library and kill some unused iommu helper functions. 1. s/iommu_area_free/bitmap_clear/ 2. s/iommu_area_reserve/bitmap_set/ 3. Use bitmap_find_next_zero_area instead of find_next_zero_area This cannot be simple substitution because find_next_zero_area doesn't check the last bit of the limit in bitmap 4. Remove iommu_area_free, iommu_area_reserve, and find_next_zero_area Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: FUJITA Tomonori Cc: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/iommu-helper.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 3b068e5b5671..64d1b638745d 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -14,14 +14,11 @@ static inline unsigned long iommu_device_max_index(unsigned long size, extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, unsigned long boundary_size); -extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, unsigned long boundary_size, unsigned long align_mask); -extern void iommu_area_free(unsigned long *map, unsigned long start, - unsigned int nr); extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len, unsigned long io_page_size); -- cgit v1.2.3