From dfd402a4c4baae42398ce9180ff424d589b8bffc Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:54 +0100 Subject: kcsan: Add Kernel Concurrency Sanitizer infrastructure Kernel Concurrency Sanitizer (KCSAN) is a dynamic data-race detector for kernel space. KCSAN is a sampling watchpoint-based data-race detector. See the included Documentation/dev-tools/kcsan.rst for more details. This patch adds basic infrastructure, but does not yet enable KCSAN for any architecture. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/compiler-clang.h | 9 ++++ include/linux/compiler-gcc.h | 7 +++ include/linux/compiler.h | 37 ++++++++++--- include/linux/kcsan-checks.h | 97 ++++++++++++++++++++++++++++++++++ include/linux/kcsan.h | 115 +++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 4 ++ 6 files changed, 261 insertions(+), 8 deletions(-) create mode 100644 include/linux/kcsan-checks.h create mode 100644 include/linux/kcsan.h (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 333a6695a918..a213eb55e725 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -24,6 +24,15 @@ #define __no_sanitize_address #endif +#if __has_feature(thread_sanitizer) +/* emulate gcc's __SANITIZE_THREAD__ flag */ +#define __SANITIZE_THREAD__ +#define __no_sanitize_thread \ + __attribute__((no_sanitize("thread"))) +#else +#define __no_sanitize_thread +#endif + /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d7ee4c6bad48..0eb2a1cc411d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -145,6 +145,13 @@ #define __no_sanitize_address #endif +#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) +#define __no_sanitize_thread \ + __attribute__((__noinline__)) __attribute__((no_sanitize_thread)) +#else +#define __no_sanitize_thread +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5e88e7e33abe..c42fa83f23fb 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -178,6 +178,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #include +#include #define __READ_ONCE_SIZE \ ({ \ @@ -193,12 +194,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, } \ }) -static __always_inline -void __read_once_size(const volatile void *p, void *res, int size) -{ - __READ_ONCE_SIZE; -} - #ifdef CONFIG_KASAN /* * We can't declare function 'inline' because __no_sanitize_address confilcts @@ -207,18 +202,44 @@ void __read_once_size(const volatile void *p, void *res, int size) * '__maybe_unused' allows us to avoid defined-but-not-used warnings. */ # define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused +# define __no_sanitize_or_inline __no_kasan_or_inline #else # define __no_kasan_or_inline __always_inline #endif -static __no_kasan_or_inline +#ifdef __SANITIZE_THREAD__ +/* + * Rely on __SANITIZE_THREAD__ instead of CONFIG_KCSAN, to avoid not inlining in + * compilation units where instrumentation is disabled. + */ +# define __no_kcsan_or_inline __no_sanitize_thread notrace __maybe_unused +# define __no_sanitize_or_inline __no_kcsan_or_inline +#else +# define __no_kcsan_or_inline __always_inline +#endif + +#ifndef __no_sanitize_or_inline +#define __no_sanitize_or_inline __always_inline +#endif + +static __no_kcsan_or_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + kcsan_check_atomic_read(p, size); + __READ_ONCE_SIZE; +} + +static __no_sanitize_or_inline void __read_once_size_nocheck(const volatile void *p, void *res, int size) { __READ_ONCE_SIZE; } -static __always_inline void __write_once_size(volatile void *p, void *res, int size) +static __no_kcsan_or_inline +void __write_once_size(volatile void *p, void *res, int size) { + kcsan_check_atomic_write(p, size); + switch (size) { case 1: *(volatile __u8 *)p = *(__u8 *)res; break; case 2: *(volatile __u16 *)p = *(__u16 *)res; break; diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h new file mode 100644 index 000000000000..e78220661086 --- /dev/null +++ b/include/linux/kcsan-checks.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KCSAN_CHECKS_H +#define _LINUX_KCSAN_CHECKS_H + +#include + +/* + * Access type modifiers. + */ +#define KCSAN_ACCESS_WRITE 0x1 +#define KCSAN_ACCESS_ATOMIC 0x2 + +/* + * __kcsan_*: Always calls into runtime when KCSAN is enabled. This may be used + * even in compilation units that selectively disable KCSAN, but must use KCSAN + * to validate access to an address. Never use these in header files! + */ +#ifdef CONFIG_KCSAN +/** + * __kcsan_check_access - check generic access for data race + * + * @ptr address of access + * @size size of access + * @type access type modifier + */ +void __kcsan_check_access(const volatile void *ptr, size_t size, int type); + +#else +static inline void __kcsan_check_access(const volatile void *ptr, size_t size, + int type) { } +#endif + +/* + * kcsan_*: Only calls into runtime when the particular compilation unit has + * KCSAN instrumentation enabled. May be used in header files. + */ +#ifdef __SANITIZE_THREAD__ +#define kcsan_check_access __kcsan_check_access +#else +static inline void kcsan_check_access(const volatile void *ptr, size_t size, + int type) { } +#endif + +/** + * __kcsan_check_read - check regular read access for data races + * + * @ptr address of access + * @size size of access + */ +#define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0) + +/** + * __kcsan_check_write - check regular write access for data races + * + * @ptr address of access + * @size size of access + */ +#define __kcsan_check_write(ptr, size) \ + __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) + +/** + * kcsan_check_read - check regular read access for data races + * + * @ptr address of access + * @size size of access + */ +#define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0) + +/** + * kcsan_check_write - check regular write access for data races + * + * @ptr address of access + * @size size of access + */ +#define kcsan_check_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) + +/* + * Check for atomic accesses: if atomic access are not ignored, this simply + * aliases to kcsan_check_access, otherwise becomes a no-op. + */ +#ifdef CONFIG_KCSAN_IGNORE_ATOMICS +#define kcsan_check_atomic_read(...) \ + do { \ + } while (0) +#define kcsan_check_atomic_write(...) \ + do { \ + } while (0) +#else +#define kcsan_check_atomic_read(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) +#define kcsan_check_atomic_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE) +#endif + +#endif /* _LINUX_KCSAN_CHECKS_H */ diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h new file mode 100644 index 000000000000..9047048fee84 --- /dev/null +++ b/include/linux/kcsan.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KCSAN_H +#define _LINUX_KCSAN_H + +#include +#include + +#ifdef CONFIG_KCSAN + +/* + * Context for each thread of execution: for tasks, this is stored in + * task_struct, and interrupts access internal per-CPU storage. + */ +struct kcsan_ctx { + int disable_count; /* disable counter */ + int atomic_next; /* number of following atomic ops */ + + /* + * We distinguish between: (a) nestable atomic regions that may contain + * other nestable regions; and (b) flat atomic regions that do not keep + * track of nesting. Both (a) and (b) are entirely independent of each + * other, and a flat region may be started in a nestable region or + * vice-versa. + * + * This is required because, for example, in the annotations for + * seqlocks, we declare seqlock writer critical sections as (a) nestable + * atomic regions, but reader critical sections as (b) flat atomic + * regions, but have encountered cases where seqlock reader critical + * sections are contained within writer critical sections (the opposite + * may be possible, too). + * + * To support these cases, we independently track the depth of nesting + * for (a), and whether the leaf level is flat for (b). + */ + int atomic_nest_count; + bool in_flat_atomic; +}; + +/** + * kcsan_init - initialize KCSAN runtime + */ +void kcsan_init(void); + +/** + * kcsan_disable_current - disable KCSAN for the current context + * + * Supports nesting. + */ +void kcsan_disable_current(void); + +/** + * kcsan_enable_current - re-enable KCSAN for the current context + * + * Supports nesting. + */ +void kcsan_enable_current(void); + +/** + * kcsan_nestable_atomic_begin - begin nestable atomic region + * + * Accesses within the atomic region may appear to race with other accesses but + * should be considered atomic. + */ +void kcsan_nestable_atomic_begin(void); + +/** + * kcsan_nestable_atomic_end - end nestable atomic region + */ +void kcsan_nestable_atomic_end(void); + +/** + * kcsan_flat_atomic_begin - begin flat atomic region + * + * Accesses within the atomic region may appear to race with other accesses but + * should be considered atomic. + */ +void kcsan_flat_atomic_begin(void); + +/** + * kcsan_flat_atomic_end - end flat atomic region + */ +void kcsan_flat_atomic_end(void); + +/** + * kcsan_atomic_next - consider following accesses as atomic + * + * Force treating the next n memory accesses for the current context as atomic + * operations. + * + * @n number of following memory accesses to treat as atomic. + */ +void kcsan_atomic_next(int n); + +#else /* CONFIG_KCSAN */ + +static inline void kcsan_init(void) { } + +static inline void kcsan_disable_current(void) { } + +static inline void kcsan_enable_current(void) { } + +static inline void kcsan_nestable_atomic_begin(void) { } + +static inline void kcsan_nestable_atomic_end(void) { } + +static inline void kcsan_flat_atomic_begin(void) { } + +static inline void kcsan_flat_atomic_end(void) { } + +static inline void kcsan_atomic_next(int n) { } + +#endif /* CONFIG_KCSAN */ + +#endif /* _LINUX_KCSAN_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 67a1d86981a9..ae4f341c1db4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -31,6 +31,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -1172,6 +1173,9 @@ struct task_struct { #ifdef CONFIG_KASAN unsigned int kasan_depth; #endif +#ifdef CONFIG_KCSAN + struct kcsan_ctx kcsan_ctx; +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ -- cgit v1.2.3 From c48981eeb0d56e107691df590007d6699441a689 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:55 +0100 Subject: include/linux/compiler.h: Introduce data_race(expr) macro This introduces the data_race(expr) macro, which can be used to annotate expressions for purposes of (1) documenting, and (2) giving tooling such as KCSAN information about which data races are deemed "safe". More context: http://lkml.kernel.org/r/CAHk-=wg5CkOEF8DTez1Qu0XTEFw_oHhxN98bDnFqbY7HL5AB2g@mail.gmail.com Signed-off-by: Marco Elver Cc: Alan Stern Cc: Eric Dumazet Cc: Linus Torvalds Cc: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/compiler.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c42fa83f23fb..7d3e77781578 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -310,6 +310,26 @@ unsigned long read_word_at_a_time(const void *addr) __u.__val; \ }) +#include + +/* + * data_race: macro to document that accesses in an expression may conflict with + * other concurrent accesses resulting in data races, but the resulting + * behaviour is deemed safe regardless. + * + * This macro *does not* affect normal code generation, but is a hint to tooling + * that data races here should be ignored. + */ +#define data_race(expr) \ + ({ \ + typeof(({ expr; })) __val; \ + kcsan_nestable_atomic_begin(); \ + __val = ({ expr; }); \ + kcsan_nestable_atomic_end(); \ + __val; \ + }) +#else + #endif /* __KERNEL__ */ /* -- cgit v1.2.3 From 88ecd153be9519f259b87a9f6f4c8383a8b3bbf1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:59 +0100 Subject: seqlock, kcsan: Add annotations for KCSAN Since seqlocks in the Linux kernel do not require the use of marked atomic accesses in critical sections, we teach KCSAN to assume such accesses are atomic. KCSAN currently also pretends that writes to `sequence` are atomic, although currently plain writes are used (their corresponding reads are READ_ONCE). Further, to avoid false positives in the absence of clear ending of a seqlock reader critical section (only when using the raw interface), KCSAN assumes a fixed number of accesses after start of a seqlock critical section are atomic. === Commentary on design around absence of clear begin/end markings === Seqlock usage via seqlock_t follows a predictable usage pattern, where clear critical section begin/end is enforced. With subtle special cases for readers needing to be flat atomic regions, e.g. because usage such as in: - fs/namespace.c:__legitimize_mnt - unbalanced read_seqretry - fs/dcache.c:d_walk - unbalanced need_seqretry But, anything directly accessing seqcount_t seems to be unpredictable. Filtering for usage of read_seqcount_retry not following 'do { .. } while (read_seqcount_retry(..));': $ git grep 'read_seqcount_retry' | grep -Ev 'while \(|seqlock.h|Doc|\* ' => about 1/3 of the total read_seqcount_retry usage. Just looking at fs/namei.c, we conclude that it is non-trivial to prescribe and migrate to an interface that would force clear begin/end seqlock markings for critical sections. As such, we concluded that the best design currently, is to simply ensure that KCSAN works well with the existing code. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/seqlock.h | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index bcf4cf26b8c8..61232bc223fd 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -37,8 +37,24 @@ #include #include #include +#include #include +/* + * The seqlock interface does not prescribe a precise sequence of read + * begin/retry/end. For readers, typically there is a call to + * read_seqcount_begin() and read_seqcount_retry(), however, there are more + * esoteric cases which do not follow this pattern. + * + * As a consequence, we take the following best-effort approach for raw usage + * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, + * pessimistically mark then next KCSAN_SEQLOCK_REGION_MAX memory accesses as + * atomics; if there is a matching read_seqcount_retry() call, no following + * memory operations are considered atomic. Usage of seqlocks via seqlock_t + * interface is not affected. + */ +#define KCSAN_SEQLOCK_REGION_MAX 1000 + /* * Version using sequence counter only. * This can be used when code has its own mutex protecting the @@ -115,6 +131,7 @@ repeat: cpu_relax(); goto repeat; } + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret; } @@ -131,6 +148,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret; } @@ -183,6 +201,7 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret & ~1; } @@ -202,7 +221,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { - return unlikely(s->sequence != start); + kcsan_atomic_next(0); + return unlikely(READ_ONCE(s->sequence) != start); } /** @@ -225,6 +245,7 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) static inline void raw_write_seqcount_begin(seqcount_t *s) { + kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); } @@ -233,6 +254,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; + kcsan_nestable_atomic_end(); } /** @@ -271,9 +293,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) */ static inline void raw_write_seqcount_barrier(seqcount_t *s) { + kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); s->sequence++; + kcsan_nestable_atomic_end(); } static inline int raw_read_seqcount_latch(seqcount_t *s) @@ -398,7 +422,9 @@ static inline void write_seqcount_end(seqcount_t *s) static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); + kcsan_nestable_atomic_begin(); s->sequence+=2; + kcsan_nestable_atomic_end(); } typedef struct { @@ -430,11 +456,21 @@ typedef struct { */ static inline unsigned read_seqbegin(const seqlock_t *sl) { - return read_seqcount_begin(&sl->seqcount); + unsigned ret = read_seqcount_begin(&sl->seqcount); + + kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry */ + kcsan_flat_atomic_begin(); + return ret; } static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { + /* + * Assume not nested: read_seqretry may be called multiple times when + * completing read critical section. + */ + kcsan_flat_atomic_end(); + return read_seqcount_retry(&sl->seqcount, start); } -- cgit v1.2.3 From bf07132f96d426bcbf2098227fb680915cf44498 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:03:00 +0100 Subject: seqlock: Require WRITE_ONCE surrounding raw_seqcount_barrier This patch proposes to require marked atomic accesses surrounding raw_write_seqcount_barrier. We reason that otherwise there is no way to guarantee propagation nor atomicity of writes before/after the barrier [1]. For example, consider the compiler tears stores either before or after the barrier; in this case, readers may observe a partial value, and because readers are unaware that writes are going on (writes are not in a seq-writer critical section), will complete the seq-reader critical section while having observed some partial state. [1] https://lwn.net/Articles/793253/ This came up when designing and implementing KCSAN, because KCSAN would flag these accesses as data-races. After careful analysis, our reasoning as above led us to conclude that the best thing to do is to propose an amendment to the raw_seqcount_barrier usage. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/seqlock.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 61232bc223fd..f52c91be8939 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -265,6 +265,13 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * + * Note that, writes surrounding the barrier should be declared atomic (e.g. + * via WRITE_ONCE): a) to ensure the writes become visible to other threads + * atomically, avoiding compiler optimizations; b) to document which writes are + * meant to propagate to the reader critical section. This is necessary because + * neither writes before and after the barrier are enclosed in a seq-writer + * critical section that would ensure readers are aware of ongoing writes. + * * seqcount_t seq; * bool X = true, Y = false; * @@ -284,11 +291,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * * void write(void) * { - * Y = true; + * WRITE_ONCE(Y, true); * * raw_write_seqcount_barrier(seq); * - * X = false; + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) -- cgit v1.2.3