From 5151412dd4338b273afdb107c3772528e9e67d92 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Nov 2011 10:59:13 +0100 Subject: block: initialize request_queue's numa node during struct request_queue is allocated with __GFP_ZERO so its "node" field is zero before initialization. This causes an oops if node 0 is offline in the page allocator because its zonelists are not initialized. From Dave Young's dmesg: SRAT: Node 1 PXM 2 0-d0000000 SRAT: Node 1 PXM 2 100000000-330000000 SRAT: Node 0 PXM 1 330000000-630000000 Initmem setup node 1 0000000000000000-000000000affb000 ... Built 1 zonelists in Node order, mobility grouping on. ... BUG: unable to handle kernel paging request at 0000000000001c08 IP: [] __alloc_pages_nodemask+0xb5/0x870 and __alloc_pages_nodemask+0xb5 translates to a NULL pointer on zonelist->_zonerefs. The fix is to initialize q->node at the time of allocation so the correct node is passed to the slab allocator later. Since blk_init_allocated_queue_node() is no longer needed, merge it with blk_init_allocated_queue(). [rientjes@google.com: changelog, initializing q->node] Cc: stable@vger.kernel.org [2.6.37+] Reported-by: Dave Young Signed-off-by: Mike Snitzer Signed-off-by: David Rientjes Tested-by: Dave Young Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c7a6d3b5bc7b..94acd8172b5b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -805,9 +805,6 @@ extern void blk_unprep_request(struct request *); */ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, - request_fn_proc *, - spinlock_t *, int node_id); extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern struct request_queue *blk_init_allocated_queue(struct request_queue *, request_fn_proc *, spinlock_t *); -- cgit v1.2.3 From a67ba43d30bf8c1cfdc2615439455302d2408453 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 1 Dec 2011 12:54:31 -0500 Subject: asm-generic/unistd.h: support new process_vm_{readv,write} syscalls Also prototype the "compat" functions so they can be referenced from C code. Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 154bf5683015..66ed067fb729 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -552,5 +552,14 @@ extern ssize_t compat_rw_copy_check_uvector(int type, extern void __user *compat_alloc_user_space(unsigned long len); +asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, const struct compat_iovec __user *rvec, + unsigned long riovcnt, unsigned long flags); +asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, const struct compat_iovec __user *rvec, + unsigned long riovcnt, unsigned long flags); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ -- cgit v1.2.3 From 02125a826459a6ad142f8d91c5b6357562f96615 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Dec 2011 08:43:34 -0500 Subject: fix apparmor dereferencing potentially freed dentry, sanitize __d_path() API __d_path() API is asking for trouble and in case of apparmor d_namespace_path() getting just that. The root cause is that when __d_path() misses the root it had been told to look for, it stores the location of the most remote ancestor in *root. Without grabbing references. Sure, at the moment of call it had been pinned down by what we have in *path. And if we raced with umount -l, we could have very well stopped at vfsmount/dentry that got freed as soon as prepend_path() dropped vfsmount_lock. It is safe to compare these pointers with pre-existing (and known to be still alive) vfsmount and dentry, as long as all we are asking is "is it the same address?". Dereferencing is not safe and apparmor ended up stepping into that. d_namespace_path() really wants to examine the place where we stopped, even if it's not connected to our namespace. As the result, it looked at ->d_sb->s_magic of a dentry that might've been already freed by that point. All other callers had been careful enough to avoid that, but it's really a bad interface - it invites that kind of trouble. The fix is fairly straightforward, even though it's bigger than I'd like: * prepend_path() root argument becomes const. * __d_path() is never called with NULL/NULL root. It was a kludge to start with. Instead, we have an explicit function - d_absolute_root(). Same as __d_path(), except that it doesn't get root passed and stops where it stops. apparmor and tomoyo are using it. * __d_path() returns NULL on path outside of root. The main caller is show_mountinfo() and that's precisely what we pass root for - to skip those outside chroot jail. Those who don't want that can (and do) use d_path(). * __d_path() root argument becomes const. Everyone agrees, I hope. * apparmor does *NOT* try to use __d_path() or any of its variants when it sees that path->mnt is an internal vfsmount. In that case it's definitely not mounted anywhere and dentry_path() is exactly what we want there. Handling of sysctl()-triggered weirdness is moved to that place. * if apparmor is asked to do pathname relative to chroot jail and __d_path() tells it we it's not in that jail, the sucker just calls d_absolute_path() instead. That's the other remaining caller of __d_path(), BTW. * seq_path_root() does _NOT_ return -ENAMETOOLONG (it's stupid anyway - the normal seq_file logics will take care of growing the buffer and redoing the call of ->show() just fine). However, if it gets path not reachable from root, it returns SEQ_SKIP. The only caller adjusted (i.e. stopped ignoring the return value as it used to do). Reviewed-by: John Johansen ACKed-by: John Johansen Signed-off-by: Al Viro Cc: stable@vger.kernel.org --- include/linux/dcache.h | 3 ++- include/linux/fs.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4df926199369..ed9f74f6c519 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -339,7 +339,8 @@ extern int d_validate(struct dentry *, struct dentry *); */ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); -extern char *__d_path(const struct path *path, struct path *root, char *, int); +extern char *__d_path(const struct path *, const struct path *, char *, int); +extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); extern char *d_path_with_unreachable(const struct path *, char *, int); extern char *dentry_path_raw(struct dentry *, char *, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index e3130220ce3e..019dc558df1a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,6 +1942,7 @@ extern int fd_statfs(int, struct kstatfs *); extern int statfs_by_dentry(struct dentry *, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); +extern bool our_mnt(struct vfsmount *mnt); extern int current_umask(void); -- cgit v1.2.3 From 83aeeada7c69f35e5100b27ec354335597a7a488 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 8 Dec 2011 14:33:54 -0800 Subject: vmscan: use atomic-long for shrinker batching Use atomic-long operations instead of looping around cmpxchg(). [akpm@linux-foundation.org: massage atomic.h inclusions] Signed-off-by: Konstantin Khlebnikov Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/mm.h | 1 + include/linux/shrinker.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 019dc558df1a..e0bc4ffb8e7f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -393,8 +393,8 @@ struct inodes_stat_t { #include #include #include -#include #include +#include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index 3dc3a8c2c485..4baadd18f4ad 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index a83833a1f7a2..07ceb97d53fa 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -35,7 +35,7 @@ struct shrinker { /* These are for internal use */ struct list_head list; - long nr; /* objs pending delete */ + atomic_long_t nr_in_batch; /* objs pending delete */ }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ extern void register_shrinker(struct shrinker *); -- cgit v1.2.3 From 6de5fc9cf7de334912de4cfd2d06eb2d744d2afe Mon Sep 17 00:00:00 2001 From: Stefan Nilsson XK Date: Thu, 3 Nov 2011 09:44:12 +0100 Subject: mmc: core: Add quirk for long data read time Adds a quirk that sets the data read timeout to a fixed value instead of relying on the information in the CSD. The timeout value chosen is 300ms since that has proven enough for the problematic cards found, but could be increased if other cards require this. This patch also enables this quirk for certain Micron cards known to have this problem. Signed-off-by: Stefan Nilsson XK Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Cc: Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 415f2db414e1..c8ef9bc54d50 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -218,6 +218,7 @@ struct mmc_card { #define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ #define MMC_QUIRK_BLK_NO_CMD23 (1<<7) /* Avoid CMD23 for regular multiblock */ #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8) /* Avoid sending 512 bytes in */ +#define MMC_QUIRK_LONG_READ_TIME (1<<9) /* Data read time > CSD says */ /* byte mode */ unsigned int poweroff_notify_state; /* eMMC4.5 notify feature */ #define MMC_NO_POWER_NOTIFICATION 0 @@ -433,6 +434,11 @@ static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512; } +static inline int mmc_card_long_read_time(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_LONG_READ_TIME; +} + #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) (dev_name(&(c)->dev)) -- cgit v1.2.3 From 13c07b0286d340275f2d97adf085cecda37ede37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 12 Dec 2011 22:06:55 -0800 Subject: linux/log2.h: Fix rounddown_pow_of_two(1) Exactly like roundup_pow_of_two(1), the rounddown version was buggy for the case of a compile-time constant '1' argument. Probably because it originated from the same code, sharing history with the roundup version from before the bugfix (for that one, see commit 1a06a52ee1b0: "Fix roundup_pow_of_two(1)"). However, unlike the roundup version, the fix for rounddown is to just remove the broken special case entirely. It's simply not needed - the generic code 1UL << ilog2(n) does the right thing for the constant '1' argment too. The only reason roundup needed that special case was because rounding up does so by subtracting one from the argument (and then adding one to the result) causing the obvious problems with "ilog2(0)". But rounddown doesn't do any of that, since ilog2() naturally truncates (ie "rounds down") to the right rounded down value. And without the ilog2(0) case, there's no reason for the special case that had the wrong value. tl;dr: rounddown_pow_of_two(1) should be 1, not 0. Acked-by: Dmitry Torokhov Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/log2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index 25b808631cd9..fd7ff3d91e6a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define rounddown_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 0 : \ (1UL << ilog2(n))) : \ __rounddown_pow_of_two(n) \ ) -- cgit v1.2.3 From 8bc1f85c02a20a59956b00b3acea12c04dce9ae8 Mon Sep 17 00:00:00 2001 From: Eugeni Dodonov Date: Wed, 23 Nov 2011 16:42:14 -0200 Subject: iommu: Export intel_iommu_enabled to signal when iommu is in use In i915 driver, we do not enable either rc6 or semaphores on SNB when dmar is enabled. The new 'intel_iommu_enabled' variable signals when the iommu code is in operation. Cc: Ted Phelps Cc: Peter Cc: Lukas Hejtmanek Cc: Andrew Lutomirski CC: Daniel Vetter Cc: Eugeni Dodonov Signed-off-by: Keith Packard --- include/linux/dma_remapping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index ef90cbd8e173..57c9a8ae4f2d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -31,6 +31,7 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; +extern int intel_iommu_enabled; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { @@ -44,6 +45,7 @@ static inline void free_dmar_iommu(struct intel_iommu *iommu) { } #define dmar_disabled (1) +#define intel_iommu_enabled (0) #endif -- cgit v1.2.3 From b1b73d095084e754562961c443aa8f6587a55f8e Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Mon, 19 Dec 2011 18:13:19 +0900 Subject: time/clocksource: Fix kernel-doc warnings Fix various KernelDoc build warnings. Signed-off-by: Kusanagi Kouichi Cc: John Stultz Link: http://lkml.kernel.org/r/20111219091320.0D5AF6FC03D@msa105.auone-net.jp Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c86c940d1de3..081147da0564 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -71,7 +71,7 @@ struct timecounter { /** * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds - * @tc: Pointer to cycle counter. + * @cc: Pointer to cycle counter. * @cycles: Cycles * * XXX - This could use some mult_lxl_ll() asm optimization. Same code @@ -114,7 +114,7 @@ extern u64 timecounter_read(struct timecounter *tc); * time base as values returned by * timecounter_read() * @tc: Pointer to time counter. - * @cycle: a value returned by tc->cc->read() + * @cycle_tstamp: a value returned by tc->cc->read() * * Cycle counts that are converted correctly as long as they * fall into the interval [-1/2 max cycle count, +1/2 max cycle count], @@ -156,11 +156,12 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) - * @maxadj maximum adjustment value to mult (~11%) + * @maxadj: maximum adjustment value to mult (~11%) * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary + * @cycle_last: most recent cycle counter value seen by ::read() */ struct clocksource { /* @@ -187,6 +188,7 @@ struct clocksource { void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); + /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; @@ -261,6 +263,9 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) /** * clocksource_cyc2ns - converts clocksource cycles to nanoseconds + * @cycles: cycles + * @mult: cycle to nanosecond multiplier + * @shift: cycle to nanosecond divisor (power of two) * * Converts cycles to nanoseconds, using the given mult and shift. * -- cgit v1.2.3