From 9b6c2d2e2ba5280649eb043cbc7e3483c77e5d69 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:35:57 -0800 Subject: lib/bitmap.c: change prototype of bitmap_copy_le Make the prototype of bitmap_copy_le the same as bitmap_copy's. All other bitmap_* functions take unsigned long* parameters; there's no reason this should be special. The only current user is the static inline uwb_mas_bm_copy_le, which already does the void* laundering, so the end users can pass their u8 or __le32 buffers without a cast. Furthermore, this allows us to simply let bitmap_copy_le be an alias for bitmap_copy on little-endian; see next patch. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5f5c00de39f0..334fe32d8f0e 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -170,7 +170,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); +extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -- cgit v1.2.3 From e8f24278329dc31b3b8223c83a5465c9df153d9d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:00 -0800 Subject: lib/bitmap.c: elide bitmap_copy_le on little-endian On little-endian, there's no reason to have an extra, presumably less efficient, way of copying a bitmap. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 334fe32d8f0e..cffc89c23c02 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -170,7 +170,11 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +#ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); +#else +#define bitmap_copy_le bitmap_copy +#endif extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -- cgit v1.2.3 From 2fbad29917c9852fa018d572cd3d43a13465d0f8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:02 -0800 Subject: lib: bitmap: change bitmap_shift_right to take unsigned parameters I've previously changed the nbits parameter of most bitmap_* functions to unsigned; now it is bitmap_shift_{left,right}'s turn. This alone saves some .text, but while at it I found that there were a few other things one could do. The end result of these seven patches is $ scripts/bloat-o-meter /tmp/bitmap.o.{old,new} add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-328 (-328) function old new delta __bitmap_shift_right 384 226 -158 __bitmap_shift_left 306 136 -170 and less importantly also a smaller stack footprint $ stack-o-meter.pl master bitmap file function old new delta lib/bitmap.o __bitmap_shift_right 24 8 -16 lib/bitmap.o __bitmap_shift_left 24 0 -24 For each pair of 0 <= shift <= nbits <= 256 I've tested the end result with a few randomly filled src buffers (including garbage beyond nbits), in each case verifying that the shift {left,right}-most bits of dst are zero and the remaining nbits-shift bits correspond to src, so I'm fairly confident I didn't screw up. That hasn't stopped me from being wrong before, though. This patch (of 7): gcc can generate slightly better code for stuff like "nbits % BITS_PER_LONG" when it knows nbits is not negative. Since negative size bitmaps or shift amounts don't make sense, change these parameters of bitmap_shift_right to unsigned. The expressions involving "lim - 1" are still ok, since if lim is 0 the loop is never executed. Also use "shift" and "nbits" consistently for the parameter names. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index cffc89c23c02..c168a807ab9a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -96,8 +96,8 @@ extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); -extern void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, int shift, int bits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, @@ -313,13 +313,13 @@ static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) return __bitmap_weight(src, nbits); } -static inline void bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int n, int nbits) +static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, int nbits) { if (small_const_nbits(nbits)) - *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n; + *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; else - __bitmap_shift_right(dst, src, n, nbits); + __bitmap_shift_right(dst, src, shift, nbits); } static inline void bitmap_shift_left(unsigned long *dst, -- cgit v1.2.3 From dba94c2553da1928303c2a6c6410247c88cafc1d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:13 -0800 Subject: lib: bitmap: change bitmap_shift_left to take unsigned parameters gcc can generate slightly better code for stuff like "nbits % BITS_PER_LONG" when it knows nbits is not negative. Since negative size bitmaps or shift amounts don't make sense, change these parameters of bitmap_shift_right to unsigned. If off >= lim (which requires shift >= nbits), k is initialized with a large positive value, but since I've let k continue to be signed, the loop will never run and dst will be zeroed as expected. Inside the loop, k is guaranteed to be non-negative, so the fact that it is promoted to unsigned in the various expressions it appears in is harmless. Also use "shift" and "nbits" consistently for the parameter names. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c168a807ab9a..5e7f75a6d7d0 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -98,8 +98,8 @@ extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); -extern void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, @@ -322,13 +322,13 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s __bitmap_shift_right(dst, src, shift, nbits); } -static inline void bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int n, int nbits) +static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) - *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); + *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); else - __bitmap_shift_left(dst, src, n, nbits); + __bitmap_shift_left(dst, src, shift, nbits); } static inline int bitmap_parse(const char *buf, unsigned int buflen, -- cgit v1.2.3 From a4bb1e43e22d3cade8f942fc6f95920248eb2fd0 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:24 -0800 Subject: mm/util: add kstrdup_const kstrdup() is often used to duplicate strings where neither source neither destination will be ever modified. In such case we can just reuse the source instead of duplicating it. The problem is that we must be sure that the source is non-modifiable and its life-time is long enough. I suspect the good candidates for such strings are strings located in kernel .rodata section, they cannot be modifed because the section is read-only and their life-time is equal to kernel life-time. This small patchset proposes alternative version of kstrdup - kstrdup_const, which returns source string if it is located in .rodata otherwise it fallbacks to kstrdup. To verify if the source is in .rodata function checks if the address is between sentinels __start_rodata, __end_rodata. I guess it should work with all architectures. The main patch is accompanied by four patches constifying kstrdup for cases where situtation described above happens frequently. I have tested the patchset on mobile platform (exynos4210-trats) and it saves 3272 string allocations. Since minimal allocation is 32 or 64 bytes depending on Kconfig options the patchset saves respectively about 100KB or 200KB of memory. Stats from tested platform show that the main offender is sysfs: By caller: 2260 __kernfs_new_node 631 clk_register+0xc8/0x1b8 318 clk_register+0x34/0x1b8 51 kmem_cache_create 12 alloc_vfsmnt By string (with count >= 5): 883 power 876 subsystem 135 parameters 132 device 61 iommu_group ... This patch (of 5): Add an alternative version of kstrdup which returns pointer to constant char array. The function checks if input string is in persistent and read-only memory section, if yes it returns the input string, otherwise it fallbacks to kstrdup. kstrdup_const is accompanied by kfree_const performing conditional memory deallocation of the string. Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index b9bc9a5d9e21..e40099e585c9 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -112,7 +112,10 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif void *memchr_inv(const void *s, int c, size_t n); +extern void kfree_const(const void *x); + extern char *kstrdup(const char *s, gfp_t gfp); +extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); -- cgit v1.2.3 From dfeb0750b630b72b5d4fb2461bc7179eceb54666 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:31 -0800 Subject: kernfs: remove KERNFS_STATIC_NAME When a new kernfs node is created, KERNFS_STATIC_NAME is used to avoid making a separate copy of its name. It's currently only used for sysfs attributes whose filenames are required to stay accessible and unchanged. There are rare exceptions where these names are allocated and formatted dynamically but for the vast majority of cases they're consts in the rodata section. Now that kernfs is converted to use kstrdup_const() and kfree_const(), there's little point in keeping KERNFS_STATIC_NAME around. Remove it. Signed-off-by: Tejun Heo Cc: Andrzej Hajda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernfs.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index d4e01b358341..71ecdab1671b 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -43,7 +43,6 @@ enum kernfs_node_flag { KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, - KERNFS_STATIC_NAME = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, }; @@ -291,7 +290,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, - bool name_is_static, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -369,8 +367,7 @@ kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, bool name_is_static, - struct lock_class_key *key) + void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * @@ -439,7 +436,7 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name, key = (struct lock_class_key *)&ops->lockdep_key; #endif return __kernfs_create_file(parent, name, mode, size, ops, priv, ns, - false, key); + key); } static inline struct kernfs_node * -- cgit v1.2.3 From 513e3d2d11c9f05db1edc70deb18a82555cf9309 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:50 -0800 Subject: cpumask: always use nr_cpu_ids in formatting and parsing functions bitmap implements two variants of scnprintf functions to format a bitmap into a string and cpumask and nodemask wrap them to provide equivalent interfaces. The scnprintf family of functions require a string buffer as an output target which complicates code paths which just want to print out the mask through printk for informational or debug purposes as they have to worry about how large the buffer should be and whether it's too large to allocate on stack. Neither cpumask or nodemask provides a guildeline on how large the target buffer should be forcing users come up with their own solutions - some allocate an arbitrarily sized buffer which is small enough to allocate on stack but may be too short in corner cases, other come up with a custom upper limit calculation considering the output format, some allocate the buffer dynamically while one resorted to using lock to synchronize access to a static buffer. This is an artificial problem which is being solved repeatedly for no benefit. In a lot of cases, the output area already exists and can be targeted directly making the intermediate buffer unnecessary. This patchset teaches printf family of functions how to format bitmaps and replace the dedicated formatting functions with it. Pointer formatting is extended to cover bitmap formatting. It uses the field width for the number of bits instead of precision. The format used is '%*pb[l]', with the optional trailing 'l' specifying list format instead of hex masks. For more details, please see 0002. This patch (of 31): Currently, the formatting and parsing functions in cpumask.h use nr_cpumask_bits like other cpumask functions; however, nr_cpumask_bits is either NR_CPUS or nr_cpu_ids depending on CONFIG_CPUMASK_OFFSTACK. This leads to inconsistent behaviors. With CONFIG_NR_CPUS=512 and !CONFIG_CPUMASK_OFFSTACK # cat /sys/devices/virtual/net/lo/queues/rx-0/rps_cpus 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000 # cat /proc/self/status | grep Cpus_allowed: Cpus_allowed: f With CONFIG_NR_CPUS=1024 and CONFIG_CPUMASK_OFFSTACK (fedora default) # cat /sys/devices/virtual/net/lo/queues/rx-0/rps_cpus 0 # cat /proc/self/status | grep Cpus_allowed: Cpus_allowed: f Note that /proc/self/status is always using nr_cpu_ids regardless of config. This is because seq cpumask formattings functions always use nr_cpu_ids. Given that the same output fields may switch between the two forms, converging on nr_cpu_ids always isn't too likely to surprise userland. This patch updates the formatting and parsing functions in cpumask.h to always use nr_cpu_ids. There's no point in dealing with CPUs which aren't even possible on the machine. Signed-off-by: Tejun Heo Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Russell King Acked-by: Rusty Russell Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ff9044286d88..ee9acb0ce542 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -550,7 +550,7 @@ static inline void cpumask_copy(struct cpumask *dstp, static inline int cpumask_scnprintf(char *buf, int len, const struct cpumask *srcp) { - return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpu_ids); } /** @@ -564,7 +564,7 @@ static inline int cpumask_scnprintf(char *buf, int len, static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { - return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -579,7 +579,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), - nr_cpumask_bits); + nr_cpu_ids); } /** @@ -595,7 +595,7 @@ static inline int cpulist_scnprintf(char *buf, int len, const struct cpumask *srcp) { return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), - nr_cpumask_bits); + nr_cpu_ids); } /** @@ -610,7 +610,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) char *nl = strchr(buf, '\n'); unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf); - return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -622,7 +622,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { - return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -817,7 +817,7 @@ static inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), - nr_cpumask_bits); + nr_cpu_ids); } /* -- cgit v1.2.3 From f1bbc032e45106400905ebb47550983af4690b0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:57 -0800 Subject: cpumask, nodemask: implement cpumask/nodemask_pr_args() printf family of functions can now format bitmaps using '%*pb[l]' and all cpumask and nodemask formatting will be converted to use it. To ease printing these masks with '%*pb[l]' which require two params - the number of bits and the actual bitmap, this patch implement cpumask_pr_args() and nodemask_pr_args() which can be used to provide arguments for '%*pb[l]' Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Russell King Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 8 ++++++++ include/linux/nodemask.h | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ee9acb0ce542..a9b3d00915a0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -22,6 +22,14 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; */ #define cpumask_bits(maskp) ((maskp)->bits) +/** + * cpumask_pr_args - printf args to output a cpumask + * @maskp: cpumask to be printed + * + * Can be used to provide arguments for '%*pb[l]' when printing a cpumask. + */ +#define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) + #if NR_CPUS == 1 #define nr_cpu_ids 1 #else diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 21cef483dc1b..10f8e556ba07 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -98,6 +98,14 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; +/** + * nodemask_pr_args - printf args to output a nodemask + * @maskp: nodemask to be printed + * + * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. + */ +#define nodemask_pr_args(maskp) MAX_NUMNODES, (maskp)->bits + /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions -- cgit v1.2.3 From 46385326cc1577587ed3e7432c2425cf6d3e4308 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:15 -0800 Subject: bitmap, cpumask, nodemask: remove dedicated formatting functions Now that all bitmap formatting usages have been converted to '%*pb[l]', the separate formatting functions are unnecessary. The following functions are removed. * bitmap_scn[list]printf() * cpumask_scnprintf(), cpulist_scnprintf() * [__]nodemask_scnprintf(), [__]nodelist_scnprintf() * seq_bitmap[_list](), seq_cpumask[_list](), seq_nodemask[_list]() * seq_buf_bitmask() Signed-off-by: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 7 ------- include/linux/cpumask.h | 31 ------------------------------- include/linux/nodemask.h | 33 +++++++-------------------------- include/linux/seq_buf.h | 3 --- include/linux/seq_file.h | 25 ------------------------- 5 files changed, 7 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5e7f75a6d7d0..dbfbf4990005 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -52,16 +52,13 @@ * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz - * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf - * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_print_to_pagebuf(list, buf, mask, nbits) Print bitmap src as list/hex */ /* @@ -147,14 +144,10 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -extern int bitmap_scnprintf(char *buf, unsigned int len, - const unsigned long *src, int nbits); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern int bitmap_scnlistprintf(char *buf, unsigned int len, - const unsigned long *src, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a9b3d00915a0..086549a665e2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -546,21 +546,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) -/** - * cpumask_scnprintf - print a cpumask into a string as comma-separated hex - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpumask_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpu_ids); -} - /** * cpumask_parse_user - extract a cpumask from a user string * @buf: the buffer to extract from @@ -590,22 +575,6 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, nr_cpu_ids); } -/** - * cpulist_scnprintf - print a cpumask into a string as comma-separated list - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpulist_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), - nr_cpu_ids); -} - /** * cpumask_parse - extract a cpumask from from a string * @buf: the buffer to extract from diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 10f8e556ba07..6e85889cf9ab 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -8,14 +8,13 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * - * For details of nodemask_scnprintf() and nodemask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. - * For details of nodelist_scnprintf() and nodelist_parse(), see - * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. - * For details of nodes_remap(), see bitmap_remap in lib/bitmap.c. - * For details of nodes_onto(), see bitmap_onto in lib/bitmap.c. - * For details of nodes_fold(), see bitmap_fold in lib/bitmap.c. + * For details of nodemask_parse_user(), see bitmap_parse_user() in + * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), + * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in + * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in + * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in + * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in + * lib/bitmap.c. * * The available nodemask operations are: * @@ -52,9 +51,7 @@ * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * - * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask - * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) @@ -312,14 +309,6 @@ static inline int __first_unset_node(const nodemask_t *maskp) #define nodes_addr(src) ((src).bits) -#define nodemask_scnprintf(buf, len, src) \ - __nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES) -static inline int __nodemask_scnprintf(char *buf, int len, - const nodemask_t *srcp, int nbits) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nbits); -} - #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static inline int __nodemask_parse_user(const char __user *buf, int len, @@ -328,14 +317,6 @@ static inline int __nodemask_parse_user(const char __user *buf, int len, return bitmap_parse_user(buf, len, dstp->bits, nbits); } -#define nodelist_scnprintf(buf, len, src) \ - __nodelist_scnprintf((buf), (len), &(src), MAX_NUMNODES) -static inline int __nodelist_scnprintf(char *buf, int len, - const nodemask_t *srcp, int nbits) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); -} - #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9aafe0e24c68..fb7eb9ccb1cd 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -125,9 +125,6 @@ extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc); -extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp, - int nmaskbits); - #ifdef CONFIG_BINARY_PRINTF extern int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index cf6a9daaaf6d..afbb1fd77c77 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -126,31 +126,6 @@ int seq_path(struct seq_file *, const struct path *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); -int seq_bitmap(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits); -static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask) -{ - return seq_bitmap(m, cpumask_bits(mask), nr_cpu_ids); -} - -static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask) -{ - return seq_bitmap(m, mask->bits, MAX_NUMNODES); -} - -int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits); - -static inline int seq_cpumask_list(struct seq_file *m, - const struct cpumask *mask) -{ - return seq_bitmap_list(m, cpumask_bits(mask), nr_cpu_ids); -} - -static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask) -{ - return seq_bitmap_list(m, mask->bits, MAX_NUMNODES); -} int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); -- cgit v1.2.3 From cb4188ac8e5779f66b9f55888ac2c75b391cde44 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:14 -0800 Subject: compiler: introduce __alias(symbol) shortcut To be consistent with other compiler attributes introduce __alias(symbol) macro expanding into __attribute__((alias(#symbol))) Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 02ae99e8e6d3..cdf13ca7cac3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,7 @@ #define __deprecated __attribute__((deprecated)) #define __packed __attribute__((packed)) #define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol))) /* * it doesn't make sense on ARM (currently the only user of __naked) to trace -- cgit v1.2.3 From 0b24becc810dc3be6e3f94103a866f214c282394 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:17 -0800 Subject: kasan: add kernel address sanitizer infrastructure Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASAN uses compile-time instrumentation for checking every memory access, therefore GCC > v4.9.2 required. v4.9.2 almost works, but has issues with putting symbol aliases into the wrong section, which breaks kasan instrumentation of globals. This patch only adds infrastructure for kernel address sanitizer. It's not available for use yet. The idea and some code was borrowed from [1]. Basic idea: The main idea of KASAN is to use shadow memory to record whether each byte of memory is safe to access or not, and use compiler's instrumentation to check the shadow memory on each memory access. Address sanitizer uses 1/8 of the memory addressable in kernel for shadow memory and uses direct mapping with a scale and offset to translate a memory address to its corresponding shadow address. Here is function to translate address to corresponding shadow address: unsigned long kasan_mem_to_shadow(unsigned long addr) { return (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } where KASAN_SHADOW_SCALE_SHIFT = 3. So for every 8 bytes there is one corresponding byte of shadow memory. The following encoding used for each shadow byte: 0 means that all 8 bytes of the corresponding memory region are valid for access; k (1 <= k <= 7) means that the first k bytes are valid for access, and other (8 - k) bytes are not; Any negative value indicates that the entire 8-bytes are inaccessible. Different negative values used to distinguish between different kinds of inaccessible memory (redzones, freed memory) (see mm/kasan/kasan.h). To be able to detect accesses to bad memory we need a special compiler. Such compiler inserts a specific function calls (__asan_load*(addr), __asan_store*(addr)) before each memory access of size 1, 2, 4, 8 or 16. These functions check whether memory region is valid to access or not by checking corresponding shadow memory. If access is not valid an error printed. Historical background of the address sanitizer from Dmitry Vyukov: "We've developed the set of tools, AddressSanitizer (Asan), ThreadSanitizer and MemorySanitizer, for user space. We actively use them for testing inside of Google (continuous testing, fuzzing, running prod services). To date the tools have found more than 10'000 scary bugs in Chromium, Google internal codebase and various open-source projects (Firefox, OpenSSL, gcc, clang, ffmpeg, MySQL and lots of others): [2] [3] [4]. The tools are part of both gcc and clang compilers. We have not yet done massive testing under the Kernel AddressSanitizer (it's kind of chicken and egg problem, you need it to be upstream to start applying it extensively). To date it has found about 50 bugs. Bugs that we've found in upstream kernel are listed in [5]. We've also found ~20 bugs in out internal version of the kernel. Also people from Samsung and Oracle have found some. [...] As others noted, the main feature of AddressSanitizer is its performance due to inline compiler instrumentation and simple linear shadow memory. User-space Asan has ~2x slowdown on computational programs and ~2x memory consumption increase. Taking into account that kernel usually consumes only small fraction of CPU and memory when running real user-space programs, I would expect that kernel Asan will have ~10-30% slowdown and similar memory consumption increase (when we finish all tuning). I agree that Asan can well replace kmemcheck. We have plans to start working on Kernel MemorySanitizer that finds uses of unitialized memory. Asan+Msan will provide feature-parity with kmemcheck. As others noted, Asan will unlikely replace debug slab and pagealloc that can be enabled at runtime. Asan uses compiler instrumentation, so even if it is disabled, it still incurs visible overheads. Asan technology is easily portable to other architectures. Compiler instrumentation is fully portable. Runtime has some arch-dependent parts like shadow mapping and atomic operation interception. They are relatively easy to port." Comparison with other debugging features: ======================================== KMEMCHECK: - KASan can do almost everything that kmemcheck can. KASan uses compile-time instrumentation, which makes it significantly faster than kmemcheck. The only advantage of kmemcheck over KASan is detection of uninitialized memory reads. Some brief performance testing showed that kasan could be x500-x600 times faster than kmemcheck: $ netperf -l 30 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to localhost (127.0.0.1) port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec no debug: 87380 16384 16384 30.00 41624.72 kasan inline: 87380 16384 16384 30.00 12870.54 kasan outline: 87380 16384 16384 30.00 10586.39 kmemcheck: 87380 16384 16384 30.03 20.23 - Also kmemcheck couldn't work on several CPUs. It always sets number of CPUs to 1. KASan doesn't have such limitation. DEBUG_PAGEALLOC: - KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page granularity level, so it able to find more bugs. SLUB_DEBUG (poisoning, redzones): - SLUB_DEBUG has lower overhead than KASan. - SLUB_DEBUG in most cases are not able to detect bad reads, KASan able to detect both reads and writes. - In some cases (e.g. redzone overwritten) SLUB_DEBUG detect bugs only on allocation/freeing of object. KASan catch bugs right before it will happen, so we always know exact place of first bad read/write. [1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel [2] https://code.google.com/p/address-sanitizer/wiki/FoundBugs [3] https://code.google.com/p/thread-sanitizer/wiki/FoundBugs [4] https://code.google.com/p/memory-sanitizer/wiki/FoundBugs [5] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel#Trophies Based on work by Andrey Konovalov. Signed-off-by: Andrey Ryabinin Acked-by: Michal Marek Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 3 +++ 2 files changed, 49 insertions(+) create mode 100644 include/linux/kasan.h (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h new file mode 100644 index 000000000000..9102fda60def --- /dev/null +++ b/include/linux/kasan.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_KASAN_H +#define _LINUX_KASAN_H + +#include + +struct kmem_cache; +struct page; + +#ifdef CONFIG_KASAN + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#include +#include + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +/* Enable reporting bugs after kasan_disable_current() */ +static inline void kasan_enable_current(void) +{ + current->kasan_depth++; +} + +/* Disable reporting bugs for current task */ +static inline void kasan_disable_current(void) +{ + current->kasan_depth--; +} + +void kasan_unpoison_shadow(const void *address, size_t size); + +#else /* CONFIG_KASAN */ + +static inline void kasan_unpoison_shadow(const void *address, size_t size) {} + +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + +#endif /* CONFIG_KASAN */ + +#endif /* LINUX_KASAN_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 048b91b983ed..41c60e5302d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1664,6 +1664,9 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; +#ifdef CONFIG_KASAN + unsigned int kasan_depth; +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; -- cgit v1.2.3 From b8c73fc2493d42517be95cf2c89659fc6c6f4d02 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:28 -0800 Subject: mm: page_alloc: add kasan hooks on alloc and free paths Add kernel address sanitizer hooks to mark allocated page's addresses as accessible in corresponding shadow region. Mark freed pages as inaccessible. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9102fda60def..f00c15c41235 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -34,6 +34,9 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_alloc_pages(struct page *page, unsigned int order); +void kasan_free_pages(struct page *page, unsigned int order); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -41,6 +44,9 @@ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} +static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} +static inline void kasan_free_pages(struct page *page, unsigned int order) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ -- cgit v1.2.3 From 912f5fbf1d3060f25d6994aed0265c55b974b2e9 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:31 -0800 Subject: mm: slub: introduce virt_to_obj function virt_to_obj takes kmem_cache address, address of slab page, address x pointing somewhere inside slab object, and returns address of the beginning of object. Signed-off-by: Andrey Ryabinin Acked-by: Christoph Lameter Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9abf04ed0999..db7d5de00c5f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -110,4 +110,20 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) } #endif + +/** + * virt_to_obj - returns address of the beginning of object. + * @s: object's kmem_cache + * @slab_page: address of slab page + * @x: address within object memory range + * + * Returns address of the beginning of object + */ +static inline void *virt_to_obj(struct kmem_cache *s, + const void *slab_page, + const void *x) +{ + return (void *)x - ((x - slab_page) % s->size); +} + #endif /* _LINUX_SLUB_DEF_H */ -- cgit v1.2.3 From 75c66def8d815201aa0386ecc7c66a5c8dbca1ee Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:35 -0800 Subject: mm: slub: share object_err function Remove static and add function declarations to linux/slub_def.h so it could be used by kernel address sanitizer. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index db7d5de00c5f..33885118523c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -126,4 +126,7 @@ static inline void *virt_to_obj(struct kmem_cache *s, return (void *)x - ((x - slab_page) % s->size); } +void object_err(struct kmem_cache *s, struct page *page, + u8 *object, char *reason); + #endif /* _LINUX_SLUB_DEF_H */ -- cgit v1.2.3 From 0316bec22ec95ea2faca6406437b0b5950553b7c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:42 -0800 Subject: mm: slub: add kernel address sanitizer support for slub allocator With this patch kasan will be able to catch bugs in memory allocated by slub. Initially all objects in newly allocated slab page, marked as redzone. Later, when allocation of slub object happens, requested by caller number of bytes marked as accessible, and the rest of the object (including slub's metadata) marked as redzone (inaccessible). We also mark object as accessible if ksize was called for this object. There is some places in kernel where ksize function is called to inquire size of really allocated area. Such callers could validly access whole allocated memory, so it should be marked as accessible. Code in slub.c and slab_common.c files could validly access to object's metadata, so instrumentation for this files are disabled. Signed-off-by: Andrey Ryabinin Signed-off-by: Dmitry Chernenkov Cc: Dmitry Vyukov Cc: Konstantin Serebryany Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 27 +++++++++++++++++++++++++++ include/linux/slab.h | 11 +++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f00c15c41235..d5310eef3e38 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -37,6 +37,18 @@ void kasan_unpoison_shadow(const void *address, size_t size); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); +void kasan_poison_slab(struct page *page); +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +void kasan_poison_object_data(struct kmem_cache *cache, void *object); + +void kasan_kmalloc_large(const void *ptr, size_t size); +void kasan_kfree_large(const void *ptr); +void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); +void kasan_krealloc(const void *object, size_t new_size); + +void kasan_slab_alloc(struct kmem_cache *s, void *object); +void kasan_slab_free(struct kmem_cache *s, void *object); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -47,6 +59,21 @@ static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} +static inline void kasan_poison_slab(struct page *page) {} +static inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) {} +static inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) {} + +static inline void kasan_kmalloc_large(void *ptr, size_t size) {} +static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size) {} +static inline void kasan_krealloc(const void *object, size_t new_size) {} + +static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} +static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index ed2ffaab59ea..76f1feeabd38 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -104,6 +104,7 @@ (unsigned long)ZERO_SIZE_PTR) #include +#include struct mem_cgroup; /* @@ -325,7 +326,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { - return kmem_cache_alloc(s, flags); + void *ret = kmem_cache_alloc(s, flags); + + kasan_kmalloc(s, ret, size); + return ret; } static __always_inline void * @@ -333,7 +337,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) { - return kmem_cache_alloc_node(s, gfpflags, node); + void *ret = kmem_cache_alloc_node(s, gfpflags, node); + + kasan_kmalloc(s, ret, size); + return ret; } #endif /* CONFIG_TRACING */ -- cgit v1.2.3 From c420f167db8c799d69fe43a801c58a7f02e9d57c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:59 -0800 Subject: kasan: enable stack instrumentation Stack instrumentation allows to detect out of bounds memory accesses for variables allocated on stack. Compiler adds redzones around every variable on stack and poisons redzones in function's prologue. Such approach significantly increases stack usage, so all in-kernel stacks size were doubled. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d3d43ecf148c..696d22312b31 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -175,6 +175,13 @@ extern struct task_group root_task_group; # define INIT_NUMA_BALANCING(tsk) #endif +#ifdef CONFIG_KASAN +# define INIT_KASAN(tsk) \ + .kasan_depth = 1, +#else +# define INIT_KASAN(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -250,6 +257,7 @@ extern struct task_group root_task_group; INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ + INIT_KASAN(tsk) \ } -- cgit v1.2.3 From 71394fe50146202f2c8d92cf50f5ebc761acf254 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:03 -0800 Subject: mm: vmalloc: add flag preventing guard hole allocation For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc(). __vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc(). Add a new vm_struct flag 'VM_NO_GUARD' indicating that vm area doesn't have a guard hole. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b87696fdf06a..1526fe712ca0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_NO_GUARD 0x00000040 /* don't add guard page */ /* bits [20..32] reserved for arch specific ioremap internals */ /* @@ -96,8 +97,12 @@ void vmalloc_sync_all(void); static inline size_t get_vm_area_size(const struct vm_struct *area) { - /* return actual size without guard page */ - return area->size - PAGE_SIZE; + if (!(area->flags & VM_NO_GUARD)) + /* return actual size without guard page */ + return area->size - PAGE_SIZE; + else + return area->size; + } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); -- cgit v1.2.3 From cb9e3c292d0115499c660028ad35ac5501d722b5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:07 -0800 Subject: mm: vmalloc: pass additional vm_flags to __vmalloc_node_range() For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc(). __vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc(). Now we have VM_NO_GUARD flag disabling guard page, so we need to pass into __vmalloc_node_range(). Add new parameter 'vm_flags' to __vmalloc_node_range() function. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1526fe712ca0..7d7acb35603d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -76,7 +76,9 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller); + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller); + extern void vfree(const void *addr); extern void *vmap(struct page **pages, unsigned int count, -- cgit v1.2.3 From 6301939d97d079f0d3dbe71e750f4daf5d39fc33 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:13 -0800 Subject: module: fix types of device tables aliases MODULE_DEVICE_TABLE() macro used to create aliases to device tables. Normally alias should have the same type as aliased symbol. Device tables are arrays, so they have 'struct type##_device_id[x]' types. Alias created by MODULE_DEVICE_TABLE() will have non-array type - 'struct type##_device_id'. This inconsistency confuses compiler, it could make a wrong assumption about variable's size which leads KASan to produce a false positive report about out of bounds access. For every global variable compiler calls __asan_register_globals() passing information about global variable (address, size, size with redzone, name ...) __asan_register_globals() poison symbols redzone to detect possible out of bounds accesses. When symbol has an alias __asan_register_globals() will be called as for symbol so for alias. Compiler determines size of variable by size of variable's type. Alias and symbol have the same address, so if alias have the wrong size part of memory that actually belongs to the symbol could be poisoned as redzone of alias symbol. By fixing type of alias symbol we will fix size of it, so __asan_register_globals() will not poison valid memory. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index b653d7c0a05a..42999fe2dbd0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -135,7 +135,7 @@ void trim_init_extable(struct module *m); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ - extern const struct type##_device_id __mod_##type##__##name##_device_table \ +extern const typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) -- cgit v1.2.3 From bebf56a1b176c2e1c9efe44e7e6915532cc682cf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:17 -0800 Subject: kasan: enable instrumentation of global variables This feature let us to detect accesses out of bounds of global variables. This will work as for globals in kernel image, so for globals in modules. Currently this won't work for symbols in user-specified sections (e.g. __init, __read_mostly, ...) The idea of this is simple. Compiler increases each global variable by redzone size and add constructors invoking __asan_register_globals() function. Information about global variable (address, size, size with redzone ...) passed to __asan_register_globals() so we could poison variable's redzone. This patch also forces module_alloc() to return 8*PAGE_SIZE aligned address making shadow memory handling ( kasan_module_alloc()/kasan_module_free() ) more simple. Such alignment guarantees that each shadow page backing modules address space correspond to only one module_alloc() allocation. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 4 ++++ include/linux/compiler-gcc5.h | 2 ++ include/linux/kasan.h | 10 ++++++++++ 3 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index d1a558239b1a..769e19864632 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -85,3 +85,7 @@ #define __HAVE_BUILTIN_BSWAP16__ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h index c8c565952548..efee493714eb 100644 --- a/include/linux/compiler-gcc5.h +++ b/include/linux/compiler-gcc5.h @@ -63,3 +63,5 @@ #define __HAVE_BUILTIN_BSWAP64__ #define __HAVE_BUILTIN_BSWAP16__ #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#define KASAN_ABI_VERSION 4 diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d5310eef3e38..72ba725ddf9c 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -49,8 +49,15 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object); +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) + +int kasan_module_alloc(void *addr, size_t size); +void kasan_module_free(void *addr); + #else /* CONFIG_KASAN */ +#define MODULE_ALIGN 1 + static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} @@ -74,6 +81,9 @@ static inline void kasan_krealloc(const void *object, size_t new_size) {} static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_module_free(void *addr) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ -- cgit v1.2.3