From e159489baa717dbae70f9903770a6a4990865887 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 9 Jan 2011 23:32:15 +0100 Subject: workqueue: relax lockdep annotation on flush_work() Currently, the lockdep annotation in flush_work() requires exclusive access on the workqueue the target work is queued on and triggers warning if a work is trying to flush another work on the same workqueue; however, this is no longer true as workqueues can now execute multiple works concurrently. This patch adds lock_map_acquire_read() and make process_one_work() hold read access to the workqueue while executing a work and start_flush_work() check for write access if concurrnecy level is one or the workqueue has a rescuer (as only one execution resource - the rescuer - is guaranteed to be available under memory pressure), and read access if higher. This better represents what's going on and removes spurious lockdep warnings which are triggered by fake dependency chain created through flush_work(). * Peter pointed out that flushing another work from a WQ_MEM_RECLAIM wq breaks forward progress guarantee under memory pressure. Condition check accordingly updated. Signed-off-by: Tejun Heo Reported-by: "Rafael J. Wysocki" Tested-by: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: stable@kernel.org --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c759..9f19430c7d07 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -522,12 +522,15 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 2, NULL, _THIS_IP_) +# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 2, NULL, _THIS_IP_) # else # define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 1, NULL, _THIS_IP_) +# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 1, NULL, _THIS_IP_) # endif # define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #else # define lock_map_acquire(l) do { } while (0) +# define lock_map_acquire_read(l) do { } while (0) # define lock_map_release(l) do { } while (0) #endif -- cgit v1.2.3 From f00c9e44ad1a9660fe8cd3ca15b6cd9497172eab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Sep 2010 17:38:58 +0200 Subject: quota: Fix deadlock during path resolution As Al Viro pointed out path resolution during Q_QUOTAON calls to quotactl is prone to deadlocks. We hold s_umount semaphore for reading during the path resolution and resolution itself may need to acquire the semaphore for writing when e. g. autofs mountpoint is passed. Solve the problem by performing the resolution before we get hold of the superblock (and thus s_umount semaphore). The whole thing is complicated by the fact that some filesystems (OCFS2) ignore the path argument. So to distinguish between filesystem which want the path and which do not we introduce new .quota_on_meta callback which does not get the path. OCFS2 then uses this callback instead of old .quota_on. CC: Al Viro CC: Christoph Hellwig CC: Ted Ts'o CC: Joel Becker Signed-off-by: Jan Kara --- include/linux/quota.h | 5 ++++- include/linux/quotaops.h | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 94c1f03b50eb..9a85412e0db6 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -322,9 +322,12 @@ struct dquot_operations { qsize_t *(*get_reserved_space) (struct inode *); }; +struct path; + /* Operations handling requests from userspace */ struct quotactl_ops { - int (*quota_on)(struct super_block *, int, int, char *); + int (*quota_on)(struct super_block *, int, int, struct path *); + int (*quota_on_meta)(struct super_block *, int, int); int (*quota_off)(struct super_block *, int); int (*quota_sync)(struct super_block *, int, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 223b14cd129c..eb354f6f26b3 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -76,11 +76,9 @@ int dquot_mark_dquot_dirty(struct dquot *dquot); int dquot_file_open(struct inode *inode, struct file *file); -int dquot_quota_on(struct super_block *sb, int type, int format_id, - char *path); int dquot_enable(struct inode *inode, int type, int format_id, unsigned int flags); -int dquot_quota_on_path(struct super_block *sb, int type, int format_id, +int dquot_quota_on(struct super_block *sb, int type, int format_id, struct path *path); int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); -- cgit v1.2.3 From 8d661f1e462d50bd83de87ee628aaf820ce3c66c Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 11 Jan 2011 16:14:24 -0800 Subject: ieee80211: correct IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK macro It is defined in include/linux/ieee80211.h. As per IEEE spec. bit6 to bit15 in block ack parameter represents buffer size. So the bitmask should be 0xFFC0. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Cc: stable@kernel.org Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 6042228954a7..294169e31364 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -959,7 +959,7 @@ struct ieee80211_ht_info { /* block-ack parameters */ #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C -#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0 +#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0 #define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 -- cgit v1.2.3 From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:06:35 +0100 Subject: lockdep: Move early boot local IRQ enable/disable status to init/main.c During early boot, local IRQ is disabled until IRQ subsystem is properly initialized. During this time, no one should enable local IRQ and some operations which usually are not allowed with IRQ disabled, e.g. operations which might sleep or require communications with other processors, are allowed. lockdep tracked this with early_boot_irqs_off/on() callbacks. As other subsystems need this information too, move it to init/main.c and make it generally available. While at it, toggle the boolean to early_boot_irqs_disabled instead of enabled so that it can be initialized with %false and %true indicates the exceptional condition. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110635.GB6036@htj.dyndns.org> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 ++ include/linux/lockdep.h | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5a9d9059520b..d07d8057e440 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -243,6 +243,8 @@ extern int test_taint(unsigned flag); extern unsigned long get_taint(void); extern int root_mountflags; +extern bool early_boot_irqs_disabled; + /* Values used for system_state */ extern enum system_states { SYSTEM_BOOTING, diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c759..f638fd78d106 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -436,16 +436,8 @@ do { \ #endif /* CONFIG_LOCKDEP */ #ifdef CONFIG_TRACE_IRQFLAGS -extern void early_boot_irqs_off(void); -extern void early_boot_irqs_on(void); extern void print_irqtrace_events(struct task_struct *curr); #else -static inline void early_boot_irqs_off(void) -{ -} -static inline void early_boot_irqs_on(void) -{ -} static inline void print_irqtrace_events(struct task_struct *curr) { } -- cgit v1.2.3 From ca3e021417eed30ec2b64ce88eb0acf64aa9bc29 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 20 Jan 2011 14:44:24 -0800 Subject: memcg: fix USED bit handling at uncharge in THP Now, under THP: at charge: - PageCgroupUsed bit is set to all page_cgroup on a hugepage. ....set to 512 pages. at uncharge - PageCgroupUsed bit is unset on the head page. So, some pages will remain with "Used" bit. This patch fixes that Used bit is set only to the head page. Used bits for tail pages will be set at splitting if necessary. This patch adds this lock order: compound_lock() -> page_cgroup_move_lock(). [akpm@linux-foundation.org: fix warning] Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6a576f989437..f512e189be5a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -146,6 +146,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask); u64 mem_cgroup_get_limit(struct mem_cgroup *mem); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail); +#endif + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -335,6 +339,11 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *mem) return 0; } +static inline void mem_cgroup_split_huge_fixup(struct page *head, + struct page *tail) +{ +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 2d6d9fd3a54a28c6f67f26eb6c74803307a1b11e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Jan 2011 22:27:14 +0100 Subject: ACPI: Introduce acpi_os_ioremap() Commit ca9b600be38c ("ACPI / PM: Make suspend_nvs_save() use acpi_os_map_memory()") attempted to prevent the code in osl.c and nvs.c from using different ioremap() variants by making the latter use acpi_os_map_memory() for mapping the NVS pages. However, that also requires acpi_os_unmap_memory() to be used for unmapping them, which causes synchronize_rcu() to be executed many times in a row unnecessarily and introduces substantial delays during resume on some systems. Instead of using acpi_os_map_memory() for mapping the NVS pages in nvs.c introduce acpi_os_ioremap() calling ioremap_cache() and make the code in both osl.c and nvs.c use it. Reported-by: Jeff Chua Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 3 --- include/linux/acpi_io.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 include/linux/acpi_io.h (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index eb176bb1b15b..a2e910e01293 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -306,9 +306,6 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req); extern void acpi_early_init(void); -int acpi_os_map_generic_address(struct acpi_generic_address *addr); -void acpi_os_unmap_generic_address(struct acpi_generic_address *addr); - #else /* !CONFIG_ACPI */ #define acpi_disabled 1 diff --git a/include/linux/acpi_io.h b/include/linux/acpi_io.h new file mode 100644 index 000000000000..7180013a4a3a --- /dev/null +++ b/include/linux/acpi_io.h @@ -0,0 +1,16 @@ +#ifndef _ACPI_IO_H_ +#define _ACPI_IO_H_ + +#include +#include + +static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, + acpi_size size) +{ + return ioremap_cache(phys, size); +} + +int acpi_os_map_generic_address(struct acpi_generic_address *addr); +void acpi_os_unmap_generic_address(struct acpi_generic_address *addr); + +#endif -- cgit v1.2.3 From 1c77ff22f539ceaa64ea43d6a26d867e84602cb7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Jan 2011 19:41:35 +0100 Subject: genirq: Remove __do_IRQ All architectures are finally converted. Remove the cruft. Signed-off-by: Thomas Gleixner Cc: Richard Henderson Cc: Mike Frysinger Cc: David Howells Cc: Tony Luck Cc: Greg Ungerer Cc: Michal Simek Acked-by: David Howells Cc: Kyle McMartin Acked-by: Benjamin Herrenschmidt Cc: Chen Liqin Cc: "David S. Miller" Cc: Chris Metcalf Cc: Jeff Dike --- include/linux/irqdesc.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 6a64c6fa81af..c1a95b7b58de 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -100,13 +100,6 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) #define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) #define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) -/* - * Monolithic do_IRQ implementation. - */ -#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ -extern unsigned int __do_IRQ(unsigned int irq); -#endif - /* * Architectures call this to let the generic IRQ layer * handle an interrupt. If the descriptor is attached to an @@ -115,14 +108,7 @@ extern unsigned int __do_IRQ(unsigned int irq); */ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) { -#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ desc->handle_irq(irq, desc); -#else - if (likely(desc->handle_irq)) - desc->handle_irq(irq, desc); - else - __do_IRQ(irq); -#endif } static inline void generic_handle_irq(unsigned int irq) -- cgit v1.2.3 From 3dece370ecc7c6152b3fdd21c6de28179f6e643d Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Fri, 21 Jan 2011 08:49:56 +0100 Subject: mm: System without MMU do not need pte_mkwrite The patch "thp: export maybe_mkwrite" (commit 14fd403f2146) breaks systems without MMU. Error log: CC arch/microblaze/mm/init.o In file included from include/linux/mman.h:14, from arch/microblaze/mm/consistent.c:24: include/linux/mm.h: In function 'maybe_mkwrite': include/linux/mm.h:482: error: implicit declaration of function 'pte_mkwrite' include/linux/mm.h:482: error: incompatible types in assignment Signed-off-by: Michal Simek CC: Andrea Arcangeli Reviewed-by: Rik van Riel CC: Andrew Morton CC: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 956a35532f47..f6385fc17ad4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -470,6 +470,7 @@ static inline void set_compound_order(struct page *page, unsigned long order) page[1].lru.prev = (void *)order; } +#ifdef CONFIG_MMU /* * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when * servicing faults for write access. In the normal case, do always want @@ -482,6 +483,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) pte = pte_mkwrite(pte); return pte; } +#endif /* * Multiple processes may "see" the same page. E.g. for untouched -- cgit v1.2.3