diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/bootmem.h | 4 | ||||
-rw-r--r-- | include/linux/compiler.h | 17 | ||||
-rw-r--r-- | include/linux/context_tracking.h | 4 | ||||
-rw-r--r-- | include/linux/context_tracking_state.h | 4 | ||||
-rw-r--r-- | include/linux/delayed_call.h | 34 | ||||
-rw-r--r-- | include/linux/fs.h | 18 | ||||
-rw-r--r-- | include/linux/init_task.h | 2 | ||||
-rw-r--r-- | include/linux/kernel.h | 28 | ||||
-rw-r--r-- | include/linux/kexec.h | 2 | ||||
-rw-r--r-- | include/linux/list.h | 14 | ||||
-rw-r--r-- | include/linux/list_bl.h | 2 | ||||
-rw-r--r-- | include/linux/list_nulls.h | 2 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 1 | ||||
-rw-r--r-- | include/linux/posix_acl_xattr.h | 6 | ||||
-rw-r--r-- | include/linux/rculist.h | 105 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 21 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 8 | ||||
-rw-r--r-- | include/linux/rcutree.h | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 20 | ||||
-rw-r--r-- | include/linux/stop_machine.h | 7 | ||||
-rw-r--r-- | include/linux/tracepoint-defs.h | 27 | ||||
-rw-r--r-- | include/linux/tracepoint.h | 20 | ||||
-rw-r--r-- | include/linux/vtime.h | 25 | ||||
-rw-r--r-- | include/linux/wait.h | 30 | ||||
-rw-r--r-- | include/linux/xattr.h | 20 |
25 files changed, 333 insertions, 92 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index f589222bfa87..35b22f94d2d2 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -19,6 +19,10 @@ extern unsigned long min_low_pfn; * highest page */ extern unsigned long max_pfn; +/* + * highest possible page + */ +extern unsigned long long max_possible_pfn; #ifndef CONFIG_NO_BOOTMEM /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 4dac1036594f..00b042c49ccd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __u.__val; \ }) +/** + * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but employs + * the control dependency of the wait to reduce the barrier on many platforms. + * + * The control dependency provides a LOAD->STORE order, the additional RMB + * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, + * aka. ACQUIRE. + */ +#define smp_cond_acquire(cond) do { \ + while (!(cond)) \ + cpu_relax(); \ + smp_rmb(); /* ctrl + rmb := acquire */ \ +} while (0) + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 68b575afe5f5..d259274238db 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static inline void guest_enter(void) { - if (vtime_accounting_enabled()) + if (vtime_accounting_cpu_enabled()) vtime_guest_enter(current); else current->flags |= PF_VCPU; @@ -100,7 +100,7 @@ static inline void guest_exit(void) if (context_tracking_is_enabled()) __context_tracking_exit(CONTEXT_GUEST); - if (vtime_accounting_enabled()) + if (vtime_accounting_cpu_enabled()) vtime_guest_exit(current); else current->flags &= ~PF_VCPU; diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index ee956c528fab..1d34fe68f48a 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -22,12 +22,12 @@ struct context_tracking { }; #ifdef CONFIG_CONTEXT_TRACKING -extern struct static_key context_tracking_enabled; +extern struct static_key_false context_tracking_enabled; DECLARE_PER_CPU(struct context_tracking, context_tracking); static inline bool context_tracking_is_enabled(void) { - return static_key_false(&context_tracking_enabled); + return static_branch_unlikely(&context_tracking_enabled); } static inline bool context_tracking_cpu_is_enabled(void) diff --git a/include/linux/delayed_call.h b/include/linux/delayed_call.h new file mode 100644 index 000000000000..f7fa76ae1a9b --- /dev/null +++ b/include/linux/delayed_call.h @@ -0,0 +1,34 @@ +#ifndef _DELAYED_CALL_H +#define _DELAYED_CALL_H + +/* + * Poor man's closures; I wish we could've done them sanely polymorphic, + * but... + */ + +struct delayed_call { + void (*fn)(void *); + void *arg; +}; + +#define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL} + +/* I really wish we had closures with sane typechecking... */ +static inline void set_delayed_call(struct delayed_call *call, + void (*fn)(void *), void *arg) +{ + call->fn = fn; + call->arg = arg; +} + +static inline void do_delayed_call(struct delayed_call *call) +{ + if (call->fn) + call->fn(call->arg); +} + +static inline void clear_delayed_call(struct delayed_call *call) +{ + call->fn = NULL; +} +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 3aa514254161..ef3cd36689f6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -31,6 +31,7 @@ #include <linux/blk_types.h> #include <linux/workqueue.h> #include <linux/percpu-rwsem.h> +#include <linux/delayed_call.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -1633,12 +1634,11 @@ struct file_operations { struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); - const char * (*follow_link) (struct dentry *, void **); + const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); - void (*put_link) (struct inode *, void *); int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); @@ -2736,14 +2736,14 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); -extern const char *page_follow_link_light(struct dentry *, void **); -extern void page_put_link(struct inode *, void *); +extern const char *page_get_link(struct dentry *, struct inode *, + struct delayed_call *); +extern void page_put_link(void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; -extern void kfree_put_link(struct inode *, void *); -extern void free_page_put_link(struct inode *, void *); +extern void kfree_link(void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); int vfs_getattr_nosec(struct path *path, struct kstat *stat); @@ -2754,7 +2754,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); -const char *simple_follow_link(struct dentry *, void **); +const char *simple_get_link(struct dentry *, struct inode *, + struct delayed_call *); extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); @@ -2764,8 +2765,6 @@ extern int vfs_lstat(const char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , const char __user *, struct kstat *, int); -extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, - unsigned long arg); extern int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, @@ -3025,5 +3024,6 @@ static inline bool dir_relax(struct inode *inode) } extern bool path_noexec(const struct path *path); +extern void inode_nohighmem(struct inode *inode); #endif /* _LINUX_FS_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1c1ff7e4faa4..f2cb8d45513d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -150,7 +150,7 @@ extern struct task_group root_task_group; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ .vtime_snap = 0, \ .vtime_snap_whence = VTIME_SYS, #else diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 350dfb08aee3..7311c3294e25 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -255,6 +255,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; +void nmi_panic_self_stop(struct pt_regs *); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); @@ -446,6 +447,33 @@ extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; /* + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It + * holds a CPU number which is executing panic() currently. A value of + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). + */ +extern atomic_t panic_cpu; +#define PANIC_CPU_INVALID -1 + +/* + * A variant of panic() called from NMI context. We return if we've already + * panicked on this CPU. If another CPU already panicked, loop in + * nmi_panic_self_stop() which can provide architecture dependent code such + * as saving register state for crash dump. + */ +#define nmi_panic(regs, fmt, ...) \ +do { \ + int old_cpu, cpu; \ + \ + cpu = raw_smp_processor_id(); \ + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); \ + \ + if (old_cpu == PANIC_CPU_INVALID) \ + panic(fmt, ##__VA_ARGS__); \ + else if (old_cpu != cpu) \ + nmi_panic_self_stop(regs); \ +} while (0) + +/* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d140b1e9faa7..7b68d2788a56 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -237,6 +237,7 @@ extern int kexec_purgatory_get_set_symbol(struct kimage *image, unsigned int size, bool get_value); extern void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +extern void __crash_kexec(struct pt_regs *); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); @@ -332,6 +333,7 @@ int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; +static inline void __crash_kexec(struct pt_regs *regs) { } static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } #define kexec_in_progress false diff --git a/include/linux/list.h b/include/linux/list.h index 993395a2e55c..5356f4d661a7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -24,7 +24,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) { - list->next = list; + WRITE_ONCE(list->next, list); list->prev = list; } @@ -42,7 +42,7 @@ static inline void __list_add(struct list_head *new, next->prev = new; new->next = next; new->prev = prev; - prev->next = new; + WRITE_ONCE(prev->next, new); } #else extern void __list_add(struct list_head *new, @@ -186,7 +186,7 @@ static inline int list_is_last(const struct list_head *list, */ static inline int list_empty(const struct list_head *head) { - return head->next == head; + return READ_ONCE(head->next) == head; } /** @@ -608,7 +608,7 @@ static inline int hlist_unhashed(const struct hlist_node *h) static inline int hlist_empty(const struct hlist_head *h) { - return !h->first; + return !READ_ONCE(h->first); } static inline void __hlist_del(struct hlist_node *n) @@ -642,7 +642,7 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->next = first; if (first) first->pprev = &n->next; - h->first = n; + WRITE_ONCE(h->first, n); n->pprev = &h->first; } @@ -653,14 +653,14 @@ static inline void hlist_add_before(struct hlist_node *n, n->pprev = next->pprev; n->next = next; next->pprev = &n->next; - *(n->pprev) = n; + WRITE_ONCE(*(n->pprev), n); } static inline void hlist_add_behind(struct hlist_node *n, struct hlist_node *prev) { n->next = prev->next; - prev->next = n; + WRITE_ONCE(prev->next, n); n->pprev = &prev->next; if (n->next) diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 8132214e8efd..ee7229a6c06a 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -70,7 +70,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h, static inline int hlist_bl_empty(const struct hlist_bl_head *h) { - return !((unsigned long)h->first & ~LIST_BL_LOCKMASK); + return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK); } static inline void hlist_bl_add_head(struct hlist_bl_node *n, diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 444d2b1313bd..b01fe1009084 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -57,7 +57,7 @@ static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) { - return is_a_nulls(h->first); + return is_a_nulls(READ_ONCE(h->first)); } static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c0e961474a52..37a3d2981352 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -359,6 +359,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); +extern int nfs_revalidate_mapping_rcu(struct inode *inode); extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 6f14ee295822..e5e8ec40278d 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -9,16 +9,12 @@ #ifndef _POSIX_ACL_XATTR_H #define _POSIX_ACL_XATTR_H +#include <uapi/linux/xattr.h> #include <linux/posix_acl.h> -/* Extended attribute names */ -#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access" -#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default" - /* Supported ACL a_version fields */ #define POSIX_ACL_XATTR_VERSION 0x0002 - /* An undefined entry e_id value */ #define ACL_UNDEFINED_ID (-1) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 5ed540986019..14ec1652daf4 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -179,32 +179,31 @@ static inline void list_replace_rcu(struct list_head *old, } /** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into + * @prev: points to the last element of the existing list + * @next: points to the first element of the existing list * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... * - * @head can be RCU-read traversed concurrently with this function. + * The list pointed to by @prev and @next can be RCU-read traversed + * concurrently with this function. * * Note that this function blocks. * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. + * Important note: the caller must take whatever action is necessary to prevent + * any other updates to the existing list. In principle, it is possible to + * modify the list as soon as sync() begins execution. If this sort of thing + * becomes necessary, an alternative version based on call_rcu() could be + * created. But only if -really- needed -- there is no shortage of RCU API + * members. */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) +static inline void __list_splice_init_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next, + void (*sync)(void)) { struct list_head *first = list->next; struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(list)) - return; /* * "first" and "last" tracking list, so initialize it. RCU readers @@ -231,10 +230,40 @@ static inline void list_splice_init_rcu(struct list_head *list, * this function. */ - last->next = at; - rcu_assign_pointer(list_next_rcu(head), first); - first->prev = head; - at->prev = last; + last->next = next; + rcu_assign_pointer(list_next_rcu(prev), first); + first->prev = prev; + next->prev = last; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list, + * designed for stacks. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head, head->next, sync); +} + +/** + * list_splice_tail_init_rcu - splice an RCU-protected list into an existing + * list, designed for queues. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_tail_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head->prev, head, sync); } /** @@ -305,6 +334,42 @@ static inline void list_splice_init_rcu(struct list_head *list, pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** + * list_entry_lockless - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_entry_lockless(ptr, type, member) \ + container_of((typeof(ptr))lockless_dereference(ptr), type, member) + +/** + * list_for_each_entry_lockless - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_for_each_entry_lockless(pos, head, member) \ + for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_lockless(pos->member.next, typeof(*pos), member)) + +/** * list_for_each_entry_continue_rcu - continue iteration over list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a0189ba67fde..14e6f47ee16f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -48,10 +48,17 @@ #include <asm/barrier.h> +#ifndef CONFIG_TINY_RCU extern int rcu_expedited; /* for sysctl */ +extern int rcu_normal; /* also for sysctl */ +#endif /* #ifndef CONFIG_TINY_RCU */ #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ +{ + return true; +} static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ { return false; @@ -65,6 +72,7 @@ static inline void rcu_unexpedite_gp(void) { } #else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); @@ -321,7 +329,6 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -void rcu_end_inkernel_boot(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); @@ -329,6 +336,12 @@ struct notifier_block; int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); +#ifndef CONFIG_TINY_RCU +void rcu_end_inkernel_boot(void); +#else /* #ifndef CONFIG_TINY_RCU */ +static inline void rcu_end_inkernel_boot(void) { } +#endif /* #ifndef CONFIG_TINY_RCU */ + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); @@ -379,9 +392,9 @@ static inline void rcu_init_nohz(void) */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter(); \ + rcu_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit(); \ + rcu_irq_exit_irqson(); \ } while (0) /* @@ -741,7 +754,7 @@ static inline void rcu_preempt_sleep_check(void) * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. * - * The tracing version of rcu_dereference_raw() must not call + * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4c1aaf9cce7b..64809aea661c 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -181,6 +181,14 @@ static inline void rcu_irq_enter(void) { } +static inline void rcu_irq_exit_irqson(void) +{ +} + +static inline void rcu_irq_enter_irqson(void) +{ +} + static inline void rcu_irq_exit(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 60d15a080d7c..ad1eda9fa4da 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,7 @@ void rcu_cpu_stall_reset(void); /* * Note a virtualization-based context switch. This is simply a * wrapper around rcu_note_context_switch(), which allows TINY_RCU - * to save a few bytes. + * to save a few bytes. The caller must have disabled interrupts. */ static inline void rcu_virt_note_context_switch(int cpu) { @@ -97,6 +97,8 @@ void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); void rcu_irq_exit(void); +void rcu_irq_enter_irqson(void); +void rcu_irq_exit_irqson(void); void exit_rcu(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index fa39434e3fdd..0c0e78102850 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void update_cpu_load_nohz(void); +extern void update_cpu_load_nohz(int active); #else -static inline void update_cpu_load_nohz(void) { } +static inline void update_cpu_load_nohz(int active) { } #endif extern unsigned long get_parent_ip(unsigned long addr); @@ -1268,8 +1268,13 @@ struct sched_entity { #endif #ifdef CONFIG_SMP - /* Per entity load average tracking */ - struct sched_avg avg; + /* + * Per entity load average tracking. + * + * Put into separate cache line so it does not + * collide with read-mostly values above. + */ + struct sched_avg avg ____cacheline_aligned_in_smp; #endif }; @@ -1520,11 +1525,14 @@ struct task_struct { cputime_t gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqlock_t vtime_seqlock; + seqcount_t vtime_seqcount; unsigned long long vtime_snap; enum { - VTIME_SLEEPING = 0, + /* Task is sleeping or running in a CPU with VTIME inactive */ + VTIME_INACTIVE = 0, + /* Task runs in userspace in a CPU with VTIME active */ VTIME_USER, + /* Task runs in kernelspace in a CPU with VTIME active */ VTIME_SYS, } vtime_snap_whence; #endif diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 0e1b1540597a..3cc9632dcc2a 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -29,7 +29,7 @@ struct cpu_stop_work { int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg); -void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, +bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); @@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work) preempt_enable(); } -static inline void stop_one_cpu_nowait(unsigned int cpu, +static inline bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf) { @@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu, work_buf->fn = fn; work_buf->arg = arg; schedule_work(&work_buf->work); + return true; } + + return false; } static inline int stop_cpus(const struct cpumask *cpumask, diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h new file mode 100644 index 000000000000..e1ee97c713bf --- /dev/null +++ b/include/linux/tracepoint-defs.h @@ -0,0 +1,27 @@ +#ifndef TRACEPOINT_DEFS_H +#define TRACEPOINT_DEFS_H 1 + +/* + * File can be included directly by headers who only want to access + * tracepoint->key to guard out of line trace calls. Otherwise + * linux/tracepoint.h should be used. + */ + +#include <linux/atomic.h> +#include <linux/static_key.h> + +struct tracepoint_func { + void *func; + void *data; + int prio; +}; + +struct tracepoint { + const char *name; /* Tracepoint name */ + struct static_key key; + void (*regfunc)(void); + void (*unregfunc)(void); + struct tracepoint_func __rcu *funcs; +}; + +#endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 696a339c592c..78e8397a1800 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,26 +17,12 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> -#include <linux/static_key.h> +#include <linux/tracepoint-defs.h> struct module; struct tracepoint; struct notifier_block; -struct tracepoint_func { - void *func; - void *data; - int prio; -}; - -struct tracepoint { - const char *name; /* Tracepoint name */ - struct static_key key; - void (*regfunc)(void); - void (*unregfunc)(void); - struct tracepoint_func __rcu *funcs; -}; - struct trace_enum_map { const char *system; const char *enum_string; @@ -171,8 +157,8 @@ extern void syscall_unregfunc(void); TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), \ - rcu_irq_enter(), \ - rcu_irq_exit()); \ + rcu_irq_enter_irqson(), \ + rcu_irq_exit_irqson()); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index c5165fd256f9..fa2196990f84 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -10,16 +10,27 @@ struct task_struct; /* - * vtime_accounting_enabled() definitions/declarations + * vtime_accounting_cpu_enabled() definitions/declarations */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -static inline bool vtime_accounting_enabled(void) { return true; } +static inline bool vtime_accounting_cpu_enabled(void) { return true; } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +/* + * Checks if vtime is enabled on some CPU. Cputime readers want to be careful + * in that case and compute the tickless cputime. + * For now vtime state is tied to context tracking. We might want to decouple + * those later if necessary. + */ static inline bool vtime_accounting_enabled(void) { - if (context_tracking_is_enabled()) { + return context_tracking_is_enabled(); +} + +static inline bool vtime_accounting_cpu_enabled(void) +{ + if (vtime_accounting_enabled()) { if (context_tracking_cpu_is_enabled()) return true; } @@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void) #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ #ifndef CONFIG_VIRT_CPU_ACCOUNTING -static inline bool vtime_accounting_enabled(void) { return false; } +static inline bool vtime_accounting_cpu_enabled(void) { return false; } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ @@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev); extern void vtime_common_task_switch(struct task_struct *prev); static inline void vtime_task_switch(struct task_struct *prev) { - if (vtime_accounting_enabled()) + if (vtime_accounting_cpu_enabled()) vtime_common_task_switch(prev); } #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ @@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk); extern void vtime_common_account_irq_enter(struct task_struct *tsk); static inline void vtime_account_irq_enter(struct task_struct *tsk) { - if (vtime_accounting_enabled()) + if (vtime_accounting_cpu_enabled()) vtime_common_account_irq_enter(tsk); } #endif /* __ARCH_HAS_VTIME_ACCOUNT */ @@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk); static inline void vtime_account_irq_exit(struct task_struct *tsk) { - if (vtime_accounting_enabled()) + if (vtime_accounting_cpu_enabled()) vtime_gen_account_irq_exit(tsk); } diff --git a/include/linux/wait.h b/include/linux/wait.h index 513b36f04dfd..d2f4ec7dba7c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) q->func = func; } +/** + * waitqueue_active -- locklessly test for waiters on the queue + * @q: the waitqueue to test for waiters + * + * returns true if the wait list is not empty + * + * NOTE: this function is lockless and requires care, incorrect usage _will_ + * lead to sporadic and non-obvious failure. + * + * Use either while holding wait_queue_head_t::lock or when used for wakeups + * with an extra smp_mb() like: + * + * CPU0 - waker CPU1 - waiter + * + * for (;;) { + * @cond = true; prepare_to_wait(&wq, &wait, state); + * smp_mb(); // smp_mb() from set_current_state() + * if (waitqueue_active(wq)) if (@cond) + * wake_up(wq); break; + * schedule(); + * } + * finish_wait(&wq, &wait); + * + * Because without the explicit smp_mb() it's possible for the + * waitqueue_active() load to get hoisted over the @cond store such that we'll + * observe an empty wait list while the waiter might not observe @cond. + * + * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), + * which (when the lock is uncontended) are of roughly equal cost. + */ static inline int waitqueue_active(wait_queue_head_t *q) { return !list_empty(&q->task_list); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 89474b9d260c..4457541de3c9 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -19,12 +19,16 @@ struct inode; struct dentry; +/* + * struct xattr_handler: When @name is set, match attributes with exactly that + * name. When @prefix is set instead, match attributes with that prefix and + * with a non-empty suffix. + */ struct xattr_handler { + const char *name; const char *prefix; int flags; /* fs private flags */ - size_t (*list)(const struct xattr_handler *, struct dentry *dentry, - char *list, size_t list_size, const char *name, - size_t name_len); + bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, const char *name, void *buffer, size_t size); int (*set)(const struct xattr_handler *, struct dentry *dentry, @@ -53,8 +57,11 @@ int generic_setxattr(struct dentry *dentry, const char *name, const void *value, int generic_removexattr(struct dentry *dentry, const char *name); ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); -int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name, - const char *value, size_t size, gfp_t flags); + +static inline const char *xattr_prefix(const struct xattr_handler *handler) +{ + return handler->prefix ?: handler->name; +} struct simple_xattrs { struct list_head head; @@ -95,8 +102,7 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); -int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name); -ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer, +ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_list_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr); |