From a75d377686037982cbec320bb770b19fe7be6a5d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 26 Oct 2010 14:21:10 -0700 Subject: types.h: move misplaced comment This comment landed in the wrong place. Cc: Andi Kleen Cc: Arnd Bergmann Cc: David Miller Cc: Eric Paris Cc: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 357dbc19606f..c2a9eb44f2fa 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -121,15 +121,7 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif -/* - * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid - * common 32/64-bit compat problems. - * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other - * architectures) and to 8-byte boundaries on 64-bit architetures. The new - * aligned_64 type enforces 8-byte alignment so that structs containing - * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. - * No conversions are necessary between 32-bit user-space and a 64-bit kernel. - */ +/* this is a special 64bit data type that is 8-byte aligned */ #define aligned_u64 __u64 __attribute__((aligned(8))) #define aligned_be64 __be64 __attribute__((aligned(8))) #define aligned_le64 __le64 __attribute__((aligned(8))) @@ -186,7 +178,15 @@ typedef __u64 __bitwise __be64; typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; -/* this is a special 64bit data type that is 8-byte aligned */ +/* + * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid + * common 32/64-bit compat problems. + * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other + * architectures) and to 8-byte boundaries on 64-bit architetures. The new + * aligned_64 type enforces 8-byte alignment so that structs containing + * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. + * No conversions are necessary between 32-bit user-space and a 64-bit kernel. + */ #define __aligned_u64 __u64 __attribute__((aligned(8))) #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) -- cgit v1.2.3 From 52c5171214ff3327961d0ce0db7e8d2ce55004fd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 26 Oct 2010 14:21:19 -0700 Subject: kfifo: disable __kfifo_must_check_helper() This helper is wrong: it coerces signed values into unsigned ones, so code such as if (kfifo_alloc(...) < 0) { error } will fail to detect the error. So let's disable __kfifo_must_check_helper() for 2.6.36. Cc: Randy Dunlap Cc: Stefani Seibold Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 62dbee554f60..c238ad2f82ea 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -171,11 +171,8 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void); } -static inline unsigned int __must_check -__kfifo_must_check_helper(unsigned int val) -{ - return val; -} +/* __kfifo_must_check_helper() is temporarily disabled because it was faulty */ +#define __kfifo_must_check_helper(x) (x) /** * kfifo_initialized - Check if the fifo is initialized -- cgit v1.2.3 From 3d5992d2ac7dc09aed8ab537cba074589f0f0a52 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 26 Oct 2010 14:21:23 -0700 Subject: oom: add per-mm oom disable count It's pointless to kill a task if another thread sharing its mm cannot be killed to allow future memory freeing. A subsequent patch will prevent kills in such cases, but first it's necessary to have a way to flag a task that shares memory with an OOM_DISABLE task that doesn't incur an additional tasklist scan, which would make select_bad_process() an O(n^2) function. This patch adds an atomic counter to struct mm_struct that follows how many threads attached to it have an oom_score_adj of OOM_SCORE_ADJ_MIN. They cannot be killed by the kernel, so their memory cannot be freed in oom conditions. This only requires task_lock() on the task that we're operating on, it does not require mm->mmap_sem since task_lock() pins the mm and the operation is atomic. [rientjes@google.com: changelog and sys_unshare() code] [rientjes@google.com: protect oom_disable_count with task_lock in fork] [rientjes@google.com: use old_mm for oom_disable_count in exec] Signed-off-by: Ying Han Signed-off-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cb57d657ce4d..bb7288a782fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -310,6 +310,8 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif + /* How many tasks sharing this mm are OOM_DISABLE */ + atomic_t oom_disable_count; }; /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ -- cgit v1.2.3 From f19e77a3dc884510dba740caa6dee126b7d40156 Mon Sep 17 00:00:00 2001 From: zeal Date: Tue, 26 Oct 2010 14:21:27 -0700 Subject: include/linux/pageblock-flags.h: fix set_pageblock_flags() macro definiton The presently-unused macro was missing one parameter. Signed-off-by: zeal Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pageblock-flags.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e8c06122be36..19ef95d293ae 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -67,7 +67,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, #define get_pageblock_flags(page) \ get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) -#define set_pageblock_flags(page) \ - set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) +#define set_pageblock_flags(page, flags) \ + set_pageblock_flags_group(page, flags, \ + 0, NR_PAGEBLOCK_BITS-1) #endif /* PAGEBLOCK_FLAGS_H */ -- cgit v1.2.3 From e4455abb50a19562dbfdc51a8424fda9b588bd6d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 26 Oct 2010 14:21:28 -0700 Subject: mm: only build per-node scan_unevictable functions when NUMA is enabled Non-NUMA systems do never create these files anyway, since they are only created by driver subsystem when NUMA is configured. [akpm@linux-foundation.org: cleanup] Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: KOSAKI Motohiro Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7cdd63366f88..eba53e71d2cc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -271,8 +271,18 @@ extern void scan_mapping_unevictable_pages(struct address_space *); extern unsigned long scan_unevictable_pages; extern int scan_unevictable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +#ifdef CONFIG_NUMA extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); +#else +static inline int scan_unevictable_register_node(struct node *node) +{ + return 0; +} +static inline void scan_unevictable_unregister_node(struct node *node) +{ +} +#endif extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -- cgit v1.2.3 From 49ac825587f33afec8841b7fab2eb4db775014e6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 26 Oct 2010 14:21:30 -0700 Subject: memory hotplug: unify is_removable and offline detection code Now, sysfs interface of memory hotplug shows whether the section is removable or not. But it checks only migrateype of pages and doesn't check details of cluster of pages. Next, memory hotplug's set_migratetype_isolate() has the same kind of check, too. This patch adds the function __count_unmovable_pages() and makes above 2 checks to use the same logic. Then, is_removable and hotremove code uses the same logic. No changes in the hotremove logic itself. TODO: need to find a way to check RECLAMABLE. But, considering bit, calling shrink_slab() against a range before starting memory hotremove sounds better. If so, this patch's logic doesn't need to be changed. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki Reported-by: Michal Hocko Cc: Wu Fengguang Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 864035fb8f8a..4307231bd22f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -70,6 +70,10 @@ extern void online_page(struct page *page); extern int online_pages(unsigned long, unsigned long); extern void __offline_isolated_pages(unsigned long, unsigned long); +#ifdef CONFIG_MEMORY_HOTREMOVE +extern bool is_pageblock_removable_nolock(struct page *page); +#endif /* CONFIG_MEMORY_HOTREMOVE */ + /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); -- cgit v1.2.3 From f629d1c9bd0dbc44a6c4f9a4a67d1646c42bfc6f Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Tue, 26 Oct 2010 14:21:33 -0700 Subject: mm: add account_page_writeback() To help developers and applications gain visibility into writeback behaviour this patch adds two counters to /proc/vmstat. # grep nr_dirtied /proc/vmstat nr_dirtied 3747 # grep nr_written /proc/vmstat nr_written 3618 These entries allow user apps to understand writeback behaviour over time and learn how it is impacting their performance. Currently there is no way to inspect dirty and writeback speed over time. It's not possible for nr_dirty/nr_writeback. These entries are necessary to give visibility into writeback behaviour. We have /proc/diskstats which lets us understand the io in the block layer. We have blktrace for more in depth understanding. We have e2fsprogs and debugsfs to give insight into the file systems behaviour, but we don't offer our users the ability understand what writeback is doing. There is no way to know how active it is over the whole system, if it's falling behind or to quantify it's efforts. With these values exported users can easily see how much data applications are sending through writeback and also at what rates writeback is processing this data. Comparing the rates of change between the two allow developers to see when writeback is not able to keep up with incoming traffic and the rate of dirty memory being sent to the IO back end. This allows folks to understand their io workloads and track kernel issues. Non kernel engineers at Google often use these counters to solve puzzling performance problems. Patch #4 adds a pernode vmstat file with nr_dirtied and nr_written Patch #5 add writeback thresholds to /proc/vmstat Currently these values are in debugfs. But they should be promoted to /proc since they are useful for developers who are writing databases and file servers and are not debugging the kernel. The output is as below: # grep threshold /proc/vmstat nr_pages_dirty_threshold 409111 nr_pages_dirty_background_threshold 818223 This patch: This allows code outside of the mm core to safely manipulate page writeback state and not worry about the other accounting. Not using these routines means that some code will lose track of the accounting and we get bugs. Modify nilfs2 to use interface. Signed-off-by: Michael Rubin Reviewed-by: KOSAKI Motohiro Reviewed-by: Wu Fengguang Cc: KONISHI Ryusuke Cc: Jiro SEKIBA Cc: Dave Chinner Cc: Jens Axboe Cc: KOSAKI Motohiro Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a4c66846fb8f..c36297faf7cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -868,6 +868,7 @@ int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_dirtied(struct page *page, struct address_space *mapping); +void account_page_writeback(struct page *page); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); -- cgit v1.2.3 From ea941f0e2a8c02ae876cd73deb4e1557248f258c Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Tue, 26 Oct 2010 14:21:35 -0700 Subject: writeback: add nr_dirtied and nr_written to /proc/vmstat To help developers and applications gain visibility into writeback behaviour adding two entries to vm_stat_items and /proc/vmstat. This will allow us to track the "written" and "dirtied" counts. # grep nr_dirtied /proc/vmstat nr_dirtied 3747 # grep nr_written /proc/vmstat nr_written 3618 Signed-off-by: Michael Rubin Reviewed-by: Wu Fengguang Cc: Dave Chinner Cc: Jens Axboe Cc: KOSAKI Motohiro Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3984c4eb41fd..c3c17fb675ee 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -104,6 +104,8 @@ enum zone_stat_item { NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ + NR_DIRTIED, /* page dirtyings since bootup */ + NR_WRITTEN, /* page writings since bootup */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ -- cgit v1.2.3 From bce54bbfde07e8b300f39dae14756c12a6ceca65 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Oct 2010 14:21:37 -0700 Subject: mm: fix typo in mm.h when NODE_NOT_IN_PAGE_FLAGS NODE_NOT_IN_PAGE_FLAGS is defined in mm.h when the node information is not stored in the page flags bitmap. Unfortunately, there's a typo in one of the checks for it. This patch fixes it (s/NODE_NOT_IN_PAGEFLAGS/NODE_NOT_IN_PAGE_FLAGS/). Since this has been around for ages, I doubt it's been causing any serious problems. Signed-off-by: Will Deacon Cc: Christoph Lameter Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c36297faf7cb..2862009f9573 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -497,8 +497,8 @@ static inline void set_compound_order(struct page *page, unsigned long order) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */ -#ifdef NODE_NOT_IN_PAGEFLAGS +/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ +#ifdef NODE_NOT_IN_PAGE_FLAGS #define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) #define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ SECTIONS_PGOFF : ZONES_PGOFF) -- cgit v1.2.3 From 0e093d99763eb4cea09f8ca4f1d01f34e121d10b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 26 Oct 2010 14:21:45 -0700 Subject: writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone If congestion_wait() is called with no BDI congested, the caller will sleep for the full timeout and this may be an unnecessary sleep. This patch adds a wait_iff_congested() that checks congestion and only sleeps if a BDI is congested else, it calls cond_resched() to ensure the caller is not hogging the CPU longer than its quota but otherwise will not sleep. This is aimed at reducing some of the major desktop stalls reported during IO. For example, while kswapd is operating, it calls congestion_wait() but it could just have been reclaiming clean page cache pages with no congestion. Without this patch, it would sleep for a full timeout but after this patch, it'll just call schedule() if it has been on the CPU too long. Similar logic applies to direct reclaimers that are not making enough progress. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Wu Fengguang Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 2 +- include/linux/mmzone.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 35b00746c712..f1b402a50679 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -285,7 +285,7 @@ enum { void clear_bdi_congested(struct backing_dev_info *bdi, int sync); void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); - +long wait_iff_congested(struct zone *zone, int sync, long timeout); static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c3c17fb675ee..39c24ebe9cfd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -423,6 +423,9 @@ struct zone { typedef enum { ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ + ZONE_CONGESTED, /* zone has many dirty pages backed by + * a congested BDI + */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -440,6 +443,11 @@ static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag) clear_bit(flag, &zone->flags); } +static inline int zone_is_reclaim_congested(const struct zone *zone) +{ + return test_bit(ZONE_CONGESTED, &zone->flags); +} + static inline int zone_is_reclaim_locked(const struct zone *zone) { return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); -- cgit v1.2.3 From 61ecdb801ef2cd28e32442383106d7837d76deac Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2010 14:21:47 -0700 Subject: mm: strictly nested kmap_atomic() Ensure kmap_atomic() usage is strictly nested Signed-off-by: Peter Zijlstra Reviewed-by: Rik van Riel Acked-by: Chris Metcalf Cc: David Howells Cc: Hugh Dickins Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Russell King Cc: Ralf Baechle Cc: David Miller Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e3060ef85b6d..283cd47bb34c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -201,8 +201,8 @@ static inline void copy_user_highpage(struct page *to, struct page *from, vfrom = kmap_atomic(from, KM_USER0); vto = kmap_atomic(to, KM_USER1); copy_user_page(vto, vfrom, vaddr, to); - kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); + kunmap_atomic(vfrom, KM_USER0); } #endif @@ -214,8 +214,8 @@ static inline void copy_highpage(struct page *to, struct page *from) vfrom = kmap_atomic(from, KM_USER0); vto = kmap_atomic(to, KM_USER1); copy_page(vto, vfrom); - kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); + kunmap_atomic(vfrom, KM_USER0); } #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 3e4d3af501cccdc8a8cca41bdbe57d54ad7e7e73 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2010 14:21:51 -0700 Subject: mm: stack based kmap_atomic() Keep the current interface but ignore the KM_type and use a stack based approach. The advantage is that we get rid of crappy code like: #define __KM_PTE \ (in_nmi() ? KM_NMI_PTE : \ in_irq() ? KM_IRQ_PTE : \ KM_PTE0) and in general can stop worrying about what context we're in and what kmap slots might be appropriate for that. The downside is that FRV kmap_atomic() gets more expensive. For now we use a CPP trick suggested by Andrew: #define kmap_atomic(page, args...) __kmap_atomic(page) to avoid having to touch all kmap_atomic() users in a single patch. [ not compiled on: - mn10300: the arch doesn't actually build with highmem to begin with ] [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix up drivers/gpu/drm/i915/intel_overlay.c] Acked-by: Rik van Riel Signed-off-by: Peter Zijlstra Acked-by: Chris Metcalf Cc: David Howells Cc: Hugh Dickins Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Russell King Cc: Ralf Baechle Cc: David Miller Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Dave Airlie Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 61 +++++++++++++++++++++++++++++----------------- include/linux/io-mapping.h | 14 +++++------ 2 files changed, 45 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 283cd47bb34c..8a85ec109a3a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -28,18 +28,6 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) #include -#ifdef CONFIG_DEBUG_HIGHMEM - -void debug_kmap_atomic(enum km_type type); - -#else - -static inline void debug_kmap_atomic(enum km_type type) -{ -} - -#endif - #ifdef CONFIG_HIGHMEM #include @@ -49,6 +37,27 @@ extern unsigned long totalhigh_pages; void kmap_flush_unused(void); +DECLARE_PER_CPU(int, __kmap_atomic_idx); + +static inline int kmap_atomic_idx_push(void) +{ + int idx = __get_cpu_var(__kmap_atomic_idx)++; +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(in_irq() && !irqs_disabled()); + BUG_ON(idx > KM_TYPE_NR); +#endif + return idx; +} + +static inline int kmap_atomic_idx_pop(void) +{ + int idx = --__get_cpu_var(__kmap_atomic_idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(idx < 0); +#endif + return idx; +} + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -66,19 +75,19 @@ static inline void kunmap(struct page *page) { } -static inline void *kmap_atomic(struct page *page, enum km_type idx) +static inline void *__kmap_atomic(struct page *page) { pagefault_disable(); return page_address(page); } -#define kmap_atomic_prot(page, idx, prot) kmap_atomic(page, idx) +#define kmap_atomic_prot(page, prot) __kmap_atomic(page) -static inline void kunmap_atomic_notypecheck(void *addr, enum km_type idx) +static inline void __kunmap_atomic(void *addr) { pagefault_enable(); } -#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) +#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) #define kmap_flush_unused() do {} while(0) @@ -86,12 +95,20 @@ static inline void kunmap_atomic_notypecheck(void *addr, enum km_type idx) #endif /* CONFIG_HIGHMEM */ -/* Prevent people trying to call kunmap_atomic() as if it were kunmap() */ -/* kunmap_atomic() should get the return value of kmap_atomic, not the page. */ -#define kunmap_atomic(addr, idx) do { \ - BUILD_BUG_ON(__same_type((addr), struct page *)); \ - kunmap_atomic_notypecheck((addr), (idx)); \ - } while (0) +/* + * Make both: kmap_atomic(page, idx) and kmap_atomic(page) work. + */ +#define kmap_atomic(page, args...) __kmap_atomic(page) + +/* + * Prevent people trying to call kunmap_atomic() as if it were kunmap() + * kunmap_atomic() should get the return value of kmap_atomic, not the page. + */ +#define kunmap_atomic(addr, args...) \ +do { \ + BUILD_BUG_ON(__same_type((addr), struct page *)); \ + __kunmap_atomic(addr); \ +} while (0) /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7fb592793738..8cdcc2a199ad 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -81,8 +81,7 @@ io_mapping_free(struct io_mapping *mapping) /* Atomic map/unmap */ static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, - unsigned long offset, - int slot) + unsigned long offset) { resource_size_t phys_addr; unsigned long pfn; @@ -90,13 +89,13 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; pfn = (unsigned long) (phys_addr >> PAGE_SHIFT); - return iomap_atomic_prot_pfn(pfn, slot, mapping->prot); + return iomap_atomic_prot_pfn(pfn, mapping->prot); } static inline void -io_mapping_unmap_atomic(void __iomem *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr) { - iounmap_atomic(vaddr, slot); + iounmap_atomic(vaddr); } static inline void __iomem * @@ -137,14 +136,13 @@ io_mapping_free(struct io_mapping *mapping) /* Atomic map/unmap */ static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, - unsigned long offset, - int slot) + unsigned long offset) { return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap_atomic(void __iomem *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr) { } -- cgit v1.2.3 From 182fea8f48332de085c0ae936605cb72671db9f2 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 26 Oct 2010 14:21:55 -0700 Subject: mm: remove alignment padding from anon_vma on (some) 64 bit builds Reorder structure anon_vma to remove alignment padding on 64 builds when (CONFIG_KSM || CONFIG_MIGRATION). This will shrink the size of the anon_vma structure from 40 to 32 bytes & allow more objects per slab in its kmem_cache. Under slub the objects in the anon_vma kmem_cache will then be 40 bytes with 102 objects per slab. (On v2.6.36 without this patch,the size is 48 bytes and 85 objects/slab.) Signed-off-by: Richard Kennedy Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 31b2fd75dcba..5c98df68a953 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -25,8 +25,8 @@ * pointing to this anon_vma once its vma list is empty. */ struct anon_vma { - spinlock_t lock; /* Serialize access to vma list */ struct anon_vma *root; /* Root of this anon_vma tree */ + spinlock_t lock; /* Serialize access to vma list */ #if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION) /* -- cgit v1.2.3 From d065bd810b6deb67d4897a14bfe21f8eb526ba99 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 26 Oct 2010 14:21:57 -0700 Subject: mm: retry page fault when blocking on disk transfer This change reduces mmap_sem hold times that are caused by waiting for disk transfers when accessing file mapped VMAs. It introduces the VM_FAULT_ALLOW_RETRY flag, which indicates that the call site wants mmap_sem to be released if blocking on a pending disk transfer. In that case, filemap_fault() returns the VM_FAULT_RETRY status bit and do_page_fault() will then re-acquire mmap_sem and retry the page fault. It is expected that the retry will hit the same page which will now be cached, and thus it will complete with a low mmap_sem hold time. Tests: - microbenchmark: thread A mmaps a large file and does random read accesses to the mmaped area - achieves about 55 iterations/s. Thread B does mmap/munmap in a loop at a separate location - achieves 55 iterations/s before, 15000 iterations/s after. - We are seeing related effects in some applications in house, which show significant performance regressions when running without this change. [akpm@linux-foundation.org: fix warning & crash] Signed-off-by: Michel Lespinasse Acked-by: Rik van Riel Acked-by: Linus Torvalds Cc: Nick Piggin Reviewed-by: Wu Fengguang Cc: Ying Han Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Acked-by: "H. Peter Anvin" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ include/linux/pagemap.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2862009f9573..3bf46655b50a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -144,6 +144,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -723,6 +724,7 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ +#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e12cdc6d79ee..2d1ffe3cf1ee 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -299,6 +299,8 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, extern void __lock_page(struct page *page); extern int __lock_page_killable(struct page *page); extern void __lock_page_nosync(struct page *page); +extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, + unsigned int flags); extern void unlock_page(struct page *page); static inline void __set_page_locked(struct page *page) @@ -350,6 +352,17 @@ static inline void lock_page_nosync(struct page *page) __lock_page_nosync(page); } +/* + * lock_page_or_retry - Lock the page, unless this would block and the + * caller indicated that it can handle a retry. + */ +static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, + unsigned int flags) +{ + might_sleep(); + return trylock_page(page) || __lock_page_or_retry(page, mm, flags); +} + /* * This is exported only for wait_on_page_locked/wait_on_page_writeback. * Never use this directly! -- cgit v1.2.3 From 25ca1d6c02fe1c6d90d918867ef670d323725458 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:21:59 -0700 Subject: mm: wrap get_locked_pte() using __cond_lock() The get_locked_pte() conditionally grabs 'ptl' in case of returning non-NULL. This leads sparse to complain about context imbalance. Rename and wrap it using __cond_lock() to make sparse happy. Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3bf46655b50a..721f451c3029 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1034,7 +1034,15 @@ extern void unregister_shrinker(struct shrinker *); int vma_wants_writenotify(struct vm_area_struct *vma); -extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); +extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl); +static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) +{ + pte_t *ptep; + __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); + return ptep; +} #ifdef __PAGETABLE_PUD_FOLDED static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, -- cgit v1.2.3 From ea4525b6008fb29553306ec6719f8e6930ac9499 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:01 -0700 Subject: rmap: annotate lock context change on page_[un]lock_anon_vma() The page_lock_anon_vma() conditionally grabs RCU and anon_vma lock but page_unlock_anon_vma() releases them unconditionally. This leads sparse to complain about context imbalance. Annotate them. Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 5c98df68a953..07ea89c16761 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -230,7 +230,20 @@ int try_to_munlock(struct page *); /* * Called by memory-failure.c to kill processes. */ -struct anon_vma *page_lock_anon_vma(struct page *page); +struct anon_vma *__page_lock_anon_vma(struct page *page); + +static inline struct anon_vma *page_lock_anon_vma(struct page *page) +{ + struct anon_vma *anon_vma; + + __cond_lock(RCU, anon_vma = __page_lock_anon_vma(page)); + + /* (void) is needed to make gcc happy */ + (void) __cond_lock(&anon_vma->root->lock, anon_vma); + + return anon_vma; +} + void page_unlock_anon_vma(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); -- cgit v1.2.3 From e9a81a821d7f9c5d899cc3acdeafbd884c2c48bb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:01 -0700 Subject: rmap: wrap page_check_address() using __cond_lock() The page_check_address() conditionally grabs *@ptlp in case of returning non-NULL. Rename and wrap it using __cond_lock() removes following warnings from sparse: mm/rmap.c:472:9: warning: context imbalance in 'page_mapped_in_vma' - unexpected unlock mm/rmap.c:524:9: warning: context imbalance in 'page_referenced_one' - unexpected unlock mm/rmap.c:706:9: warning: context imbalance in 'page_mkclean_one' - unexpected unlock mm/rmap.c:1066:9: warning: context imbalance in 'try_to_unmap_one' - unexpected unlock Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 07ea89c16761..bb83c0da2071 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -205,9 +205,20 @@ int try_to_unmap_one(struct page *, struct vm_area_struct *, /* * Called from mm/filemap_xip.c to unmap empty zero page */ -pte_t *page_check_address(struct page *, struct mm_struct *, +pte_t *__page_check_address(struct page *, struct mm_struct *, unsigned long, spinlock_t **, int); +static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm, + unsigned long address, + spinlock_t **ptlp, int sync) +{ + pte_t *ptep; + + __cond_lock(*ptlp, ptep = __page_check_address(page, mm, address, + ptlp, sync)); + return ptep; +} + /* * Used by swapoff to help locate where page is expected in vma. */ -- cgit v1.2.3 From 92c09c041f15fc88b35f8628e07639f52e1fbb38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:03 -0700 Subject: mm: declare some external symbols Declare 'bdi_pending_list' and 'tag_pages_for_writeback()' to remove following sparse warnings: mm/backing-dev.c:46:1: warning: symbol 'bdi_pending_list' was not declared. Should it be static? mm/page-writeback.c:825:6: warning: symbol 'tag_pages_for_writeback' was not declared. Should it be static? Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 1 + include/linux/writeback.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f1b402a50679..4ce34fa937d4 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); extern spinlock_t bdi_lock; extern struct list_head bdi_list; +extern struct list_head bdi_pending_list; static inline int wb_has_dirty_io(struct bdi_writeback *wb) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..c7299d2ace6b 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -149,6 +149,8 @@ int write_cache_pages(struct address_space *mapping, int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void set_page_dirty_balance(struct page *page, int page_mkwrite); void writeback_set_ratelimit(void); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl -- cgit v1.2.3 From 16b56cf4b8a0fa9acc21bd2ad19839b917999b96 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:04 -0700 Subject: mm: fix sparse warnings on GFP_ZONE_TABLE/BAD Introduce ___GFP_* masks in order for gfp_t to not be mixed with plain integers which causes a lot of warnings like the following: warning: restricted gfp_t degrades to integer Signed-off-by: Namhyung Kim Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 105 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 975609cb8548..e8713d55360a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -9,6 +9,32 @@ struct vm_area_struct; +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA 0x01u +#define ___GFP_HIGHMEM 0x02u +#define ___GFP_DMA32 0x04u +#define ___GFP_MOVABLE 0x08u +#define ___GFP_WAIT 0x10u +#define ___GFP_HIGH 0x20u +#define ___GFP_IO 0x40u +#define ___GFP_FS 0x80u +#define ___GFP_COLD 0x100u +#define ___GFP_NOWARN 0x200u +#define ___GFP_REPEAT 0x400u +#define ___GFP_NOFAIL 0x800u +#define ___GFP_NORETRY 0x1000u +#define ___GFP_COMP 0x4000u +#define ___GFP_ZERO 0x8000u +#define ___GFP_NOMEMALLOC 0x10000u +#define ___GFP_HARDWALL 0x20000u +#define ___GFP_THISNODE 0x40000u +#define ___GFP_RECLAIMABLE 0x80000u +#ifdef CONFIG_KMEMCHECK +#define ___GFP_NOTRACK 0x200000u +#else +#define ___GFP_NOTRACK 0 +#endif + /* * GFP bitmasks.. * @@ -18,10 +44,10 @@ struct vm_area_struct; * without the underscores and use them consistently. The definitions here may * be used in bit comparisons. */ -#define __GFP_DMA ((__force gfp_t)0x01u) -#define __GFP_HIGHMEM ((__force gfp_t)0x02u) -#define __GFP_DMA32 ((__force gfp_t)0x04u) -#define __GFP_MOVABLE ((__force gfp_t)0x08u) /* Page is movable */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) /* * Action modifiers - doesn't change the zoning @@ -38,27 +64,22 @@ struct vm_area_struct; * __GFP_MOVABLE: Flag that this page will be movable by the page migration * mechanism or reclaimed */ -#define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */ -#define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */ -#define __GFP_IO ((__force gfp_t)0x40u) /* Can start physical IO? */ -#define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */ -#define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */ -#define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */ -#define __GFP_REPEAT ((__force gfp_t)0x400u) /* See above */ -#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* See above */ -#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* See above */ -#define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ -#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ -#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ -#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ -#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ -#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ - -#ifdef CONFIG_KMEMCHECK -#define __GFP_NOTRACK ((__force gfp_t)0x200000u) /* Don't track with kmemcheck */ -#else -#define __GFP_NOTRACK ((__force gfp_t)0) -#endif +#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */ +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ +#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ +#define __GFP_COLD ((__force gfp_t)___GFP_COLD) /* Cache-cold page required */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) /* Suppress page allocation failure warning */ +#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */ +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */ +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */ +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */ +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */ +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves */ +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ +#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ /* * This may seem redundant, but it's a way of annotating false positives vs. @@ -186,14 +207,14 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) #endif #define GFP_ZONE_TABLE ( \ - (ZONE_NORMAL << 0 * ZONES_SHIFT) \ - | (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) \ - | (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT) \ - | (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT) \ - | (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT) \ - | (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT) \ - | (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\ - | (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\ + (ZONE_NORMAL << 0 * ZONES_SHIFT) \ + | (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT) \ + | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT) \ + | (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT) \ + | (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT) \ + | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT) \ + | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT) \ + | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT) \ ) /* @@ -203,20 +224,20 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) * allowed. */ #define GFP_ZONE_BAD ( \ - 1 << (__GFP_DMA | __GFP_HIGHMEM) \ - | 1 << (__GFP_DMA | __GFP_DMA32) \ - | 1 << (__GFP_DMA32 | __GFP_HIGHMEM) \ - | 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM) \ - | 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA) \ - | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA) \ - | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM) \ - | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\ + 1 << (___GFP_DMA | ___GFP_HIGHMEM) \ + | 1 << (___GFP_DMA | ___GFP_DMA32) \ + | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM) \ + | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM) \ + | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA) \ + | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA) \ + | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM) \ + | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \ ) static inline enum zone_type gfp_zone(gfp_t flags) { enum zone_type z; - int bit = flags & GFP_ZONEMASK; + int bit = (__force int) (flags & GFP_ZONEMASK); z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & ((1 << ZONES_SHIFT) - 1); -- cgit v1.2.3 From e1ca7788dec6773b1a2bce51b7141948f2b8bccf Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 26 Oct 2010 14:22:06 -0700 Subject: mm: add vzalloc() and vzalloc_node() helpers Add vzalloc() and vzalloc_node() to encapsulate the vmalloc-then-memset-zero operation. Use __GFP_ZERO to zero fill the allocated memory. Signed-off-by: Dave Young Cc: Christoph Lameter Acked-by: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 63a4fe6d51bd..a03dcf62ca9d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -53,8 +53,10 @@ static inline void vmalloc_init(void) #endif extern void *vmalloc(unsigned long size); +extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); +extern void *vzalloc_node(unsigned long size, int node); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); -- cgit v1.2.3 From f27c85c56b32c42bcc54a43189c1e00fdceb23ec Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Tue, 26 Oct 2010 14:22:21 -0700 Subject: kernel.h: add {min,max}3 macros Introduce two additional min/max macros to compare three operands. This will save some cycles as well as some bytes on the stack and last but not least more pleasing as macro nesting. [akpm@linux-foundation.org: fix warnings] Signed-off-by: Hagen Paul Pfeifer Cc: Joe Perches Cc: Ingo Molnar Cc: Hartley Sweeten Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Herbert Xu Cc: Roland Dreier Cc: Sean Hefty Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index edef168a0406..8e786a27cfe6 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -651,6 +651,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) +#define min3(x, y, z) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + typeof(z) _min3 = (z); \ + (void) (&_min1 == &_min2); \ + (void) (&_min1 == &_min3); \ + _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \ + (_min2 < _min3 ? _min2 : _min3); }) + +#define max3(x, y, z) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + typeof(z) _max3 = (z); \ + (void) (&_max1 == &_max2); \ + (void) (&_max1 == &_max3); \ + _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \ + (_max2 > _max3 ? _max2 : _max3); }) + /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero * @x: value1 -- cgit v1.2.3 From a55621f15bc61826969a29e111ba131a55ef45de Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 26 Oct 2010 14:22:25 -0700 Subject: include/linux/kernel.h: add __must_check to strict_strto*() The whole point to using the strict functions is to check the return value. If you don't, strict_strto*() will return you uninitialised garbage. Offenders have been observed in the wild. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 8e786a27cfe6..e9b492b33032 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -203,10 +203,10 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -extern int strict_strtoul(const char *, unsigned int, unsigned long *); -extern int strict_strtol(const char *, unsigned int, long *); -extern int strict_strtoull(const char *, unsigned int, unsigned long long *); -extern int strict_strtoll(const char *, unsigned int, long long *); +extern int __must_check strict_strtoul(const char *, unsigned int, unsigned long *); +extern int __must_check strict_strtol(const char *, unsigned int, long *); +extern int __must_check strict_strtoull(const char *, unsigned int, unsigned long long *); +extern int __must_check strict_strtoll(const char *, unsigned int, long long *); extern int sprintf(char * buf, const char * fmt, ...) __attribute__ ((format (printf, 2, 3))); extern int vsprintf(char *buf, const char *, va_list) -- cgit v1.2.3 From b6472776816af1ed52848c93d26e3edb3b17adab Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Oct 2010 14:22:26 -0700 Subject: modules: no need to align .modinfo strings gcc aligns strings as a performance consideration for those cases where strings are being used a lot. Their use is not performance critical, and hence it seems better to save some space. Signed-off-by: Jan Beulich Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 9d2f1837b3d8..112adf8bd47d 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -21,8 +21,8 @@ #define __module_cat(a,b) ___module_cat(a,b) #define __MODULE_INFO(tag, name, info) \ static const char __module_cat(name,__LINE__)[] \ - __used \ - __attribute__((section(".modinfo"),unused)) = __stringify(tag) "=" info + __used __attribute__((section(".modinfo"), unused, aligned(1))) \ + = __stringify(tag) "=" info #else /* !MODULE */ #define __MODULE_INFO(tag, name, info) #endif -- cgit v1.2.3 From ca1cab37d91cbe8a8333732540d43cabb54cfa85 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 26 Oct 2010 14:22:34 -0700 Subject: workqueues: s/ON_STACK/ONSTACK/ Silly though it is, completions and wait_queue_heads use foo_ONSTACK (COMPLETION_INITIALIZER_ONSTACK, DECLARE_COMPLETION_ONSTACK, __WAIT_QUEUE_HEAD_INIT_ONSTACK and DECLARE_WAIT_QUEUE_HEAD_ONSTACK) so I guess workqueues should do the same thing. s/INIT_WORK_ON_STACK/INIT_WORK_ONSTACK/ s/INIT_DELAYED_WORK_ON_STACK/INIT_DELAYED_WORK_ONSTACK/ Cc: Peter Zijlstra Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 070bb7a88936..0c0771f06bfa 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -190,7 +190,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } __INIT_WORK((_work), (_func), 0); \ } while (0) -#define INIT_WORK_ON_STACK(_work, _func) \ +#define INIT_WORK_ONSTACK(_work, _func) \ do { \ __INIT_WORK((_work), (_func), 1); \ } while (0) @@ -201,9 +201,9 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } init_timer(&(_work)->timer); \ } while (0) -#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ +#define INIT_DELAYED_WORK_ONSTACK(_work, _func) \ do { \ - INIT_WORK_ON_STACK(&(_work)->work, (_func)); \ + INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ init_timer_on_stack(&(_work)->timer); \ } while (0) -- cgit v1.2.3 From 190420ab34ab4c077c641893ac19f364cf3606e4 Mon Sep 17 00:00:00 2001 From: Samu Onkalo Date: Tue, 26 Oct 2010 14:22:37 -0700 Subject: drivers/misc: driver for bh1770glc / sfh7770 ALS and proximity sensor This is a driver for ROHM BH1770GLC and OSRAM SFH7770 combined ALS and proximity sensor. Interface is sysfs based. The driver uses interrupts to provide new data. The driver supports pm_runtime and regulator frameworks. See Documentation/misc-devices/bh1770glc.txt for details Signed-off-by: Samu Onkalo Acked-by: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/bh1770glc.h | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/i2c/bh1770glc.h (limited to 'include/linux') diff --git a/include/linux/i2c/bh1770glc.h b/include/linux/i2c/bh1770glc.h new file mode 100644 index 000000000000..8b5e2df36c72 --- /dev/null +++ b/include/linux/i2c/bh1770glc.h @@ -0,0 +1,53 @@ +/* + * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __BH1770_H__ +#define __BH1770_H__ + +/** + * struct bh1770_platform_data - platform data for bh1770glc driver + * @led_def_curr: IR led driving current. + * @glass_attenuation: Attenuation factor for covering window. + * @setup_resources: Call back for interrupt line setup function + * @release_resources: Call back for interrupte line release function + * + * Example of glass attenuation: 16384 * 385 / 100 means attenuation factor + * of 3.85. i.e. light_above_sensor = light_above_cover_window / 3.85 + */ + +struct bh1770_platform_data { +#define BH1770_LED_5mA 0 +#define BH1770_LED_10mA 1 +#define BH1770_LED_20mA 2 +#define BH1770_LED_50mA 3 +#define BH1770_LED_100mA 4 +#define BH1770_LED_150mA 5 +#define BH1770_LED_200mA 6 + __u8 led_def_curr; +#define BH1770_NEUTRAL_GA 16384 /* 16384 / 16384 = 1 */ + __u32 glass_attenuation; + int (*setup_resources)(void); + int (*release_resources)(void); +}; +#endif -- cgit v1.2.3 From 92b1f84d46b24675493d95a239eea2b07e5f13f8 Mon Sep 17 00:00:00 2001 From: Samu Onkalo Date: Tue, 26 Oct 2010 14:22:38 -0700 Subject: drivers/misc: driver for APDS990X ALS and proximity sensors This is a driver for Avago APDS990X combined ALS and proximity sensor. Interface is sysfs based. The driver uses interrupts to provide new data. The driver supports pm_runtime and regulator frameworks. See Documentation/misc-devices/apds990x.txt for details Signed-off-by: Samu Onkalo Acked-by: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/apds990x.h | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 include/linux/i2c/apds990x.h (limited to 'include/linux') diff --git a/include/linux/i2c/apds990x.h b/include/linux/i2c/apds990x.h new file mode 100644 index 000000000000..d186fcc5d257 --- /dev/null +++ b/include/linux/i2c/apds990x.h @@ -0,0 +1,79 @@ +/* + * This file is part of the APDS990x sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __APDS990X_H__ +#define __APDS990X_H__ + + +#define APDS_IRLED_CURR_12mA 0x3 +#define APDS_IRLED_CURR_25mA 0x2 +#define APDS_IRLED_CURR_50mA 0x1 +#define APDS_IRLED_CURR_100mA 0x0 + +/** + * struct apds990x_chip_factors - defines effect of the cover window + * @ga: Total glass attenuation + * @cf1: clear channel factor 1 for raw to lux conversion + * @irf1: IR channel factor 1 for raw to lux conversion + * @cf2: clear channel factor 2 for raw to lux conversion + * @irf2: IR channel factor 2 for raw to lux conversion + * @df: device factor for conversion formulas + * + * Structure for tuning ALS calculation to match with environment. + * Values depend on the material above the sensor and the sensor + * itself. If the GA is zero, driver will use uncovered sensor default values + * format: decimal value * APDS_PARAM_SCALE except df which is plain integer. + */ +#define APDS_PARAM_SCALE 4096 +struct apds990x_chip_factors { + int ga; + int cf1; + int irf1; + int cf2; + int irf2; + int df; +}; + +/** + * struct apds990x_platform_data - platform data for apsd990x.c driver + * @cf: chip factor data + * @pddrive: IR-led driving current + * @ppcount: number of IR pulses used for proximity estimation + * @setup_resources: interrupt line setup call back function + * @release_resources: interrupt line release call back function + * + * Proximity detection result depends heavily on correct ppcount, pdrive + * and cover window. + * + */ + +struct apds990x_platform_data { + struct apds990x_chip_factors cf; + u8 pdrive; + u8 ppcount; + int (*setup_resources)(void); + int (*release_resources)(void); +}; + +#endif -- cgit v1.2.3 From 518de9b39e854542de59bfb8b9f61c8f7ecf808b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2010 14:22:44 -0700 Subject: fs: allow for more than 2^31 files Robin Holt tried to boot a 16TB system and found af_unix was overflowing a 32bit value : We were seeing a failure which prevented boot. The kernel was incapable of creating either a named pipe or unix domain socket. This comes down to a common kernel function called unix_create1() which does: atomic_inc(&unix_nr_socks); if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) goto out; The function get_max_files() is a simple return of files_stat.max_files. files_stat.max_files is a signed integer and is computed in fs/file_table.c's files_init(). n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = n; In our case, mempages (total_ram_pages) is approx 3,758,096,384 (0xe0000000). That leaves max_files at approximately 1,503,238,553. This causes 2 * get_max_files() to integer overflow. Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long integers, and change af_unix to use an atomic_long_t instead of atomic_t. get_max_files() is changed to return an unsigned long. get_nr_files() is changed to return a long. unix_nr_socks is changed from atomic_t to atomic_long_t, while not strictly needed to address Robin problem. Before patch (on a 64bit kernel) : # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max -18446744071562067968 After patch: # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max 2147483648 # cat /proc/sys/fs/file-nr 704 0 2147483648 Reported-by: Robin Holt Signed-off-by: Eric Dumazet Acked-by: David Miller Reviewed-by: Robin Holt Tested-by: Robin Holt Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f34ff6e5558..b2cdb6bc8287 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -34,9 +34,9 @@ /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { - int nr_files; /* read only */ - int nr_free_files; /* read only */ - int max_files; /* tunable */ + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ }; struct inodes_stat_t { @@ -400,7 +400,7 @@ extern void __init inode_init_early(void); extern void __init files_init(unsigned long); extern struct files_stat_struct files_stat; -extern int get_max_files(void); +extern unsigned long get_max_files(void); extern int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; -- cgit v1.2.3 From 766f9164193f6dda1497bbf3861060198421fb92 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2010 14:22:45 -0700 Subject: kernel: remove PF_FLUSHER PF_FLUSHER is only ever set, not tested, remove it. Signed-off-by: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 56154bbb8da9..393ce94e54b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1706,7 +1706,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ #define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ -- cgit v1.2.3 From f5d87d851d76a390d0fab2f77bd1d563d69ee586 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:49 -0700 Subject: printk: declare printk_ratelimit_state in ratelimit.h Adding declaration of printk_ratelimit_state in ratelimit.h removes potential build breakage and following sparse warning: kernel/printk.c:1426:1: warning: symbol 'printk_ratelimit_state' was not declared. Should it be static? [akpm@linux-foundation.org: remove unneeded ifdef] Signed-off-by: Namhyung Kim Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ratelimit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 8f69d09a41a5..03ff67b0cdf5 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -36,6 +36,8 @@ static inline void ratelimit_state_init(struct ratelimit_state *rs, rs->begin = 0; } +extern struct ratelimit_state printk_ratelimit_state; + extern int ___ratelimit(struct ratelimit_state *rs, const char *func); #define __ratelimit(state) ___ratelimit(state, __func__) -- cgit v1.2.3 From 77006a0a828249dd69341f960043ee41e7487aa0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 26 Oct 2010 14:22:49 -0700 Subject: ratelimit: add comment warning people off printk_ratelimit() printk_ratelimit() was a bad idea - we don't want subsytem A causing ratelimiting of subsystem B's messages. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e9b492b33032..77b04ed037df 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -277,6 +277,11 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; +/* + * Please don't use printk_ratelimit(), because it shares ratelimiting state + * with all other unrelated printk_ratelimit() callsites. Instead use + * printk_ratelimited() or plain old __ratelimit(). + */ extern int __printk_ratelimit(const char *func); #define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, -- cgit v1.2.3 From 658716d19f8f155c67d4677ba68034b8e492dfbe Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 26 Oct 2010 14:23:10 -0700 Subject: div64_u64(): improve precision on 32bit platforms The current implementation of div64_u64 for 32bit systems returns an approximately correct result when the divisor exceeds 32bits. Since doing 64bit division using 32bit hardware is a long since solved problem we just use one of the existing proven methods. Additionally, add a div64_s64 function to correctly handle doing signed 64bit division. Addresses https://bugzilla.redhat.com/show_bug.cgi?id=616105 Signed-off-by: Brian Behlendorf Signed-off-by: Oleg Nesterov Cc: Ben Woodard Cc: Jeremy Fitzhardinge Cc: Mark Grondona Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 5 +++++ include/linux/math64.h | 12 ++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 77b04ed037df..450092c1e35f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -173,6 +173,11 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +#define abs64(x) ({ \ + s64 __x = (x); \ + (__x < 0) ? -__x : __x; \ + }) + #ifdef CONFIG_PROVE_LOCKING void might_fault(void); #else diff --git a/include/linux/math64.h b/include/linux/math64.h index c87f1528703a..23fcdfcba81b 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -35,6 +35,14 @@ static inline u64 div64_u64(u64 dividend, u64 divisor) return dividend / divisor; } +/** + * div64_s64 - signed 64bit divide with 64bit divisor + */ +static inline s64 div64_s64(s64 dividend, s64 divisor) +{ + return dividend / divisor; +} + #elif BITS_PER_LONG == 32 #ifndef div_u64_rem @@ -53,6 +61,10 @@ extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder); extern u64 div64_u64(u64 dividend, u64 divisor); #endif +#ifndef div64_s64 +extern s64 div64_s64(s64 dividend, s64 divisor); +#endif + #endif /* BITS_PER_LONG */ /** -- cgit v1.2.3