From e72246748ff006ab928bc774e276e6ef5542f9c5 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:00 -0800 Subject: locking/mutexes/mcs: Restructure the MCS lock defines and locking code into its own file We will need the MCS lock code for doing optimistic spinning for rwsem and queued rwlock. Extracting the MCS code from mutex.c and put into its own file allow us to reuse this code easily. We also inline mcs_spin_lock and mcs_spin_unlock functions for better efficiency. Note that using the smp_load_acquire/smp_store_release pair used in mcs_lock and mcs_unlock is not sufficient to form a full memory barrier across cpus for many architectures (except x86). For applications that absolutely need a full barrier across multiple cpus with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be used after mcs_lock. Reviewed-by: Paul E. McKenney Signed-off-by: Tim Chen Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1390347360.3138.63.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 77 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mutex.h | 5 +-- 2 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 include/linux/mcs_spinlock.h (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h new file mode 100644 index 000000000000..9578ef81940b --- /dev/null +++ b/include/linux/mcs_spinlock.h @@ -0,0 +1,77 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +/* + * Note: the smp_load_acquire/smp_store_release pair is not + * sufficient to form a full memory barrier across + * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. + * For applications that need a full barrier across multiple cpus + * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be + * used after mcs_lock. + */ +static inline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) + arch_mutex_cpu_relax(); +} + +static inline +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked, 1); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index d3181936c138..c482e1d2cc49 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,6 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ +struct mcs_spinlock; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -55,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - void *spin_mlock; /* Spinner MCS lock */ + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); # define arch_mutex_cpu_relax() cpu_relax() #endif -#endif +#endif /* __LINUX_MUTEX_H */ -- cgit v1.2.3 From 5faeb8adb956a5ad6579c4e309e8689943ad8294 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 21 Jan 2014 15:36:05 -0800 Subject: locking/mcs: Micro-optimize the MCS code, add extra comments Remove unnecessary operation to assign locked status to 1 if lock is acquired without contention. Lock status will not be checked by lock holder again once it is acquired and any lock contenders will not be looking at the lock holder's lock status. Make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock() likely() as it is likely that a race did not occur most of the time. Also add in more comments describing how the local node is used in MCS locks. Reviewed-by: Paul E. McKenney Reviewed-by: Tim Chen Signed-off-by: Jason Low Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1390347365.3138.64.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 9578ef81940b..143fa428a857 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -25,6 +25,17 @@ struct mcs_spinlock { * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be * used after mcs_lock. */ + +/* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ static inline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { @@ -36,8 +47,14 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) prev = xchg(lock, node); if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; + /* + * Lock acquired, don't need to set node->locked to 1. Threads + * only spin on its own node->locked value for lock acquisition. + * However, since this thread can immediately acquire the lock + * and does not proceed to spin on its own node->locked, this + * value won't be used. If a debug mode is needed to + * audit lock status, then set node->locked value here. + */ return; } ACCESS_ONCE(prev->next) = node; @@ -50,6 +67,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mutex_cpu_relax(); } +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ static inline void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { @@ -59,7 +80,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) /* * Release the lock by setting it to NULL */ - if (cmpxchg(lock, node, NULL) == node) + if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) -- cgit v1.2.3 From e207552e64ea053a33e856828ad7915484911d06 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Jan 2014 15:36:10 -0800 Subject: locking/mcs: Allow architectures to hook in to contended paths When contended, architectures may be able to reduce the polling overhead in ways which aren't expressible using a simple relax() primitive. This patch allows architectures to hook into the mcs_{lock,unlock} functions for the contended cases only. Reviewed-by: Paul E. McKenney Signed-off-by: Will Deacon Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1390347370.3138.65.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 143fa428a857..e9a4d74c63dc 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -17,6 +17,28 @@ struct mcs_spinlock { int locked; /* 1 if lock acquired */ }; +#ifndef arch_mcs_spin_lock_contended +/* + * Using smp_load_acquire() provides a memory barrier that ensures + * subsequent operations happen after the lock is acquired. + */ +#define arch_mcs_spin_lock_contended(l) \ +do { \ + while (!(smp_load_acquire(l))) \ + arch_mutex_cpu_relax(); \ +} while (0) +#endif + +#ifndef arch_mcs_spin_unlock_contended +/* + * smp_store_release() provides a memory barrier to ensure all + * operations in the critical section has been completed before + * unlocking. + */ +#define arch_mcs_spin_unlock_contended(l) \ + smp_store_release((l), 1) +#endif + /* * Note: the smp_load_acquire/smp_store_release pair is not * sufficient to form a full memory barrier across @@ -58,13 +80,9 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) return; } ACCESS_ONCE(prev->next) = node; - /* - * Wait until the lock holder passes the lock down. - * Using smp_load_acquire() provides a memory barrier that - * ensures subsequent operations happen after the lock is acquired. - */ - while (!(smp_load_acquire(&node->locked))) - arch_mutex_cpu_relax(); + + /* Wait until the lock holder passes the lock down. */ + arch_mcs_spin_lock_contended(&node->locked); } /* @@ -86,13 +104,9 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - /* - * Pass lock to next waiter. - * smp_store_release() provides a memory barrier to ensure - * all operations in the critical section has been completed - * before unlocking. - */ - smp_store_release(&next->locked, 1); + + /* Pass lock to next waiter. */ + arch_mcs_spin_unlock_contended(&next->locked); } #endif /* __LINUX_MCS_SPINLOCK_H */ -- cgit v1.2.3 From 52bf84aa206cd2c2516dfa3e03b578edf8a3242f Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:40 -0500 Subject: sched/numa, mm: Remove p->numa_migrate_deferred Excessive migration of pages can hurt the performance of workloads that span multiple NUMA nodes. However, it turns out that the p->numa_migrate_deferred knob is a really big hammer, which does reduce migration rates, but does not actually help performance. Now that the second stage of the automatic numa balancing code has stabilized, it is time to replace the simplistic migration deferral code with something smarter. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-2-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ffccdad050b5..d572d5ba650f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1457,7 +1457,6 @@ struct task_struct { unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; - int numa_migrate_deferred; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ struct callback_head numa_work; -- cgit v1.2.3 From ff1df896aef8e0ec1556a5c44f424bd45bfa2cbe Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:41 -0500 Subject: sched/numa: Rename p->numa_faults to numa_faults_memory In order to get a more consistent naming scheme, making it clear which fault statistics track memory locality, and which track CPU locality, rename the memory fault statistics. Suggested-by: Mel Gorman Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-3-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d572d5ba650f..144d509df053 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1469,15 +1469,15 @@ struct task_struct { * Scheduling placement decisions are made based on the these counts. * The values remain static for the duration of a PTE scan */ - unsigned long *numa_faults; + unsigned long *numa_faults_memory; unsigned long total_numa_faults; /* * numa_faults_buffer records faults per node during the current - * scan window. When the scan completes, the counts in numa_faults - * decay and these values are copied. + * scan window. When the scan completes, the counts in + * numa_faults_memory decay and these values are copied. */ - unsigned long *numa_faults_buffer; + unsigned long *numa_faults_buffer_memory; /* * numa_faults_locality tracks if faults recorded during the last -- cgit v1.2.3 From 50ec8a401fed6d246ab65e6011d61ac91c34af70 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:42 -0500 Subject: sched/numa: Track from which nodes NUMA faults are triggered Track which nodes NUMA faults are triggered from, in other words the CPUs on which the NUMA faults happened. This uses a similar mechanism to what is used to track the memory involved in numa faults. The next patches use this to build up a bitmap of which nodes a workload is actively running on. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-4-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 144d509df053..5fb0cfb43ecf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1479,6 +1479,13 @@ struct task_struct { */ unsigned long *numa_faults_buffer_memory; + /* + * Track the nodes the process was running on when a NUMA hinting + * fault was incurred. + */ + unsigned long *numa_faults_cpu; + unsigned long *numa_faults_buffer_cpu; + /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local. The task scan period is adapted @@ -1582,8 +1589,6 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p); - -extern unsigned int sysctl_numa_balancing_migrate_deferred; #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) -- cgit v1.2.3 From 10f39042711ba21773763f267b4943a2c66c8bef Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:44 -0500 Subject: sched/numa, mm: Use active_nodes nodemask to limit numa migrations Use the active_nodes nodemask to make smarter decisions on NUMA migrations. In order to maximize performance of workloads that do not fit in one NUMA node, we want to satisfy the following criteria: 1) keep private memory local to each thread 2) avoid excessive NUMA migration of pages 3) distribute shared memory across the active nodes, to maximize memory bandwidth available to the workload This patch accomplishes that by implementing the following policy for NUMA migrations: 1) always migrate on a private fault 2) never migrate to a node that is not in the set of active nodes for the numa_group 3) always migrate from a node outside of the set of active nodes, to a node that is in that set 4) within the set of active nodes in the numa_group, only migrate from a node with more NUMA page faults, to a node with fewer NUMA page faults, with a 25% margin to avoid ping-ponging This results in most pages of a workload ending up on the actively used nodes, with reduced ping-ponging of pages between those nodes. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-6-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5fb0cfb43ecf..5ab3b89fc33e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1589,6 +1589,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p); +extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -1604,6 +1606,11 @@ static inline void set_numabalancing_state(bool enabled) static inline void task_numa_free(struct task_struct *p) { } +static inline bool should_numa_migrate_memory(struct task_struct *p, + struct page *page, int src_nid, int dst_cpu) +{ + return true; +} #endif static inline struct pid *task_pid(struct task_struct *task) -- cgit v1.2.3 From 7e2703e6099609adc93679c4d45cd6247f565971 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:45 -0500 Subject: sched/numa: Normalize faults_cpu stats and weigh by CPU use Tracing the code that decides the active nodes has made it abundantly clear that the naive implementation of the faults_from code has issues. Specifically, the garbage collector in some workloads will access orders of magnitudes more memory than the threads that do all the active work. This resulted in the node with the garbage collector being marked the only active node in the group. This issue is avoided if we weigh the statistics by CPU use of each task in the numa group, instead of by how many faults each thread has occurred. To achieve this, we normalize the number of faults to the fraction of faults that occurred on each node, and then multiply that fraction by the fraction of CPU time the task has used since the last time task_numa_placement was invoked. This way the nodes in the active node mask will be the ones where the tasks from the numa group are most actively running, and the influence of eg. the garbage collector and other do-little threads is properly minimized. On a 4 node system, using CPU use statistics calculated over a longer interval results in about 1% fewer page migrations with two 32-warehouse specjbb runs on a 4 node system, and about 5% fewer page migrations, as well as 1% better throughput, with two 8-warehouse specjbb runs, as compared with the shorter term statistics kept by the scheduler. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-7-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ab3b89fc33e..ef92953764f2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1459,6 +1459,8 @@ struct task_struct { int numa_preferred_nid; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; struct callback_head numa_work; struct list_head numa_entry; -- cgit v1.2.3 From da7e6f45c34d39186b72328bacc4dd86bff60e0a Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:06 +0530 Subject: time: Change the return type of clockevents_notify() to integer The broadcast framework can potentially be made use of by archs which do not have an external clock device as well. Then, it is required that one of the CPUs need to handle the broadcasting of wakeup IPIs to the CPUs in deep idle. As a result its local timers should remain functional all the time. For such a CPU, the BROADCAST_ENTER notification has to fail indicating that its clock device cannot be shutdown. To make way for this support, change the return type of tick_broadcast_oneshot_control() and hence clockevents_notify() to indicate such scenarios. Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080606.17187.78306.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 493aa021c7a9..e0c5a6c418de 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -186,9 +186,9 @@ static inline int tick_check_broadcast_expired(void) { return 0; } #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS -extern void clockevents_notify(unsigned long reason, void *arg); +extern int clockevents_notify(unsigned long reason, void *arg); #else -static inline void clockevents_notify(unsigned long reason, void *arg) {} +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } #endif #else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ @@ -196,7 +196,7 @@ static inline void clockevents_notify(unsigned long reason, void *arg) {} static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} -static inline void clockevents_notify(unsigned long reason, void *arg) {} +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } #endif -- cgit v1.2.3 From 5d1638acb9f62fa7eb0c07cb85318bbe1f13b227 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:32 +0530 Subject: tick: Introduce hrtimer based broadcast On some architectures, in certain CPU deep idle states the local timers stop. An external clock device is used to wakeup these CPUs. The kernel support for the wakeup of these CPUs is provided by the tick broadcast framework by using the external clock device as the wakeup source. However not all implementations of architectures provide such an external clock device. This patch includes support in the broadcast framework to handle the wakeup of the CPUs in deep idle states on such systems by queuing a hrtimer on one of the CPUs, which is meant to handle the wakeup of CPUs in deep idle states. This patchset introduces a pseudo clock device which can be registered by the archs as tick_broadcast_device in the absence of a real external clock device. Once registered, the broadcast framework will work as is for these architectures as long as the archs take care of the BROADCAST_ENTER notification failing for one of the CPUs. This CPU is made the stand by CPU to handle wakeup of the CPUs in deep idle and it *must not enter deep idle states*. The CPU with the earliest wakeup is chosen to be this CPU. Hence this way the stand by CPU dynamically moves around and so does the hrtimer which is queued to trigger at the next earliest wakeup time. This is consistent with the case where an external clock device is present. The smp affinity of this clock device is set to the CPU with the earliest wakeup. This patchset handles the hotplug of the stand by CPU as well by moving the hrtimer on to the CPU handling the CPU_DEAD notification. Originally-from: Thomas Gleixner Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080632.17187.80532.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e0c5a6c418de..dbe9e1457168 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -62,6 +62,11 @@ enum clock_event_mode { #define CLOCK_EVT_FEAT_DYNIRQ 0x000020 #define CLOCK_EVT_FEAT_PERCPU 0x000040 +/* + * Clockevent device is based on a hrtimer for broadcast + */ +#define CLOCK_EVT_FEAT_HRTIMER 0x000080 + /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low @@ -83,6 +88,7 @@ enum clock_event_mode { * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) + * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code * @owner: module reference @@ -113,6 +119,7 @@ struct clock_event_device { const char *name; int rating; int irq; + int bound_on; const struct cpumask *cpumask; struct list_head list; struct module *owner; @@ -180,9 +187,11 @@ extern int tick_receive_broadcast(void); #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); #else static inline int tick_check_broadcast_expired(void) { return 0; } +static void tick_setup_hrtimer_broadcast(void) {}; #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS -- cgit v1.2.3 From f1689bb7abec8e2e670d8ad11eaa86d54bad8cfd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 16:00:46 +0100 Subject: time: Fixup fallout from recent clockevent/tick changes Make the stub function static inline instead of static and move the clockevents related function into the proper ifdeffed section. Reported-by: Fengguang Wu Signed-off-by: Thomas Gleixner Cc: Soren Brinkmann Cc: Preeti U Murthy --- include/linux/clockchips.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index dbe9e1457168..20a7183f2831 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -191,7 +191,7 @@ extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); #else static inline int tick_check_broadcast_expired(void) { return 0; } -static void tick_setup_hrtimer_broadcast(void) {}; +static inline void tick_setup_hrtimer_broadcast(void) {}; #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS -- cgit v1.2.3 From 980f88e414418bf65569a3b62b08b07e6fc2f4c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Feb 2014 17:28:41 +0100 Subject: sched/wait: Suppress Sparse 'variable shadowing' warning This warning seems to show up a lot now, since ___wait_event() is (indirectly) used inside wait_event_timeout(), which also has a variable called __ret. Rename the one in ___wait_event() to ___ret (another leading underscore) to suppress the warning. Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/1391704121.12789.20.camel@jlt4.sipsolutions.net Signed-off-by: Ingo Molnar --- include/linux/wait.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 559044c79232..c55ea5c24404 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -195,7 +195,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); ({ \ __label__ __out; \ wait_queue_t __wait; \ - long __ret = ret; \ + long ___ret = ret; \ \ INIT_LIST_HEAD(&__wait.task_list); \ if (exclusive) \ @@ -210,7 +210,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); break; \ \ if (___wait_is_interruptible(state) && __int) { \ - __ret = __int; \ + ___ret = __int; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: __ret; \ +__out: ___ret; \ }) #define __wait_event(wq, condition) \ -- cgit v1.2.3 From 6a02ad66b2c44155d529f430d4fa5c6c66321077 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 18:11:08 +0100 Subject: perf/x86: Push the duration-logging printk() to IRQ context Calling printk() from NMI context is bad (TM), so move it to IRQ context. This also avoids the problem where the printk() time is measured by the generic NMI duration goo and triggers a second warning. Signed-off-by: Peter Zijlstra Cc: Don Zickus Cc: Dave Hansen Link: http://lkml.kernel.org/n/tip-75dv35xf6dhhmeb7nq6fua31@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 66017028dcb3..add13c8624b7 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -30,6 +30,8 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) work->func = func; } +#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } + void irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); -- cgit v1.2.3 From 5c228079ce8a9bb043a423069a6674dfb9268037 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 27 Jan 2014 17:15:37 -0500 Subject: sched: Move the priority specific bits into a new header file Some bits about priority are defined in linux/sched/rt.h, but some of them are not only for rt scheduler, such as MAX_PRIO. This patch move them all into a new header file, linux/sched/prio.h. Signed-off-by: Dongsheng Yang Cc: clark.williams@gmail.com Cc: rostedt@goodmis.org Cc: raistlin@linux.it Cc: juri.lelli@gmail.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/f7549508a1588da2c613d601748ca9de30fa5dcf.1390859827.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ include/linux/sched/prio.h | 23 +++++++++++++++++++++++ include/linux/sched/rt.h | 19 +------------------ 3 files changed, 26 insertions(+), 18 deletions(-) create mode 100644 include/linux/sched/prio.h (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ed867797fe5a..d97d0a8e87dc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3,6 +3,8 @@ #include +#include + struct sched_param { int sched_priority; diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h new file mode 100644 index 000000000000..9382ba84d5d0 --- /dev/null +++ b/include/linux/sched/prio.h @@ -0,0 +1,23 @@ +#ifndef _SCHED_PRIO_H +#define _SCHED_PRIO_H + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + 40) +#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + +#endif /* _SCHED_PRIO_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 34e4ebea8fce..f7453d4c5613 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -1,24 +1,7 @@ #ifndef _SCHED_RT_H #define _SCHED_RT_H -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) +#include static inline int rt_prio(int prio) { -- cgit v1.2.3 From 6b6350f155afdfdf888e18c7bf26950a6d10b0c2 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 27 Jan 2014 17:15:38 -0500 Subject: sched: Expose some macros related to priority Some macros in kernel/sched/sched.h about priority are private to kernel/sched. But they are useful to other parts of the core kernel. This patch moves these macros from kernel/sched/sched.h to include/linux/sched/prio.h so that they are available to other subsystems. Signed-off-by: Dongsheng Yang Cc: raistlin@linux.it Cc: juri.lelli@gmail.com Cc: clark.williams@gmail.com Cc: rostedt@goodmis.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/2b022810905b52d13238466807f4b2a691577180.1390859827.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/sched/prio.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 9382ba84d5d0..13216f16762e 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -20,4 +20,22 @@ #define MAX_PRIO (MAX_RT_PRIO + 40) #define DEFAULT_PRIO (MAX_RT_PRIO + 20) +/* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], + * and back. + */ +#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) +#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) +#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) + +/* + * 'User priority' is the nice value converted to something we + * can work with better when scaling various scheduler parameters, + * it's a [ 0 ... 39 ] range. + */ +#define USER_PRIO(p) ((p)-MAX_RT_PRIO) +#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) + #endif /* _SCHED_PRIO_H */ -- cgit v1.2.3 From 849401b66d305f3feb75b6db7459b95ad190552a Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Sun, 9 Feb 2014 11:32:22 +0530 Subject: tick: Fixup more fallout from hrtimer broadcast mode The hrtimer mode of broadcast is supported only when GENERIC_CLOCKEVENTS_BROADCAST and TICK_ONESHOT config options are enabled. Hence compile in the functions for hrtimer mode of broadcast only when these options are selected. Also fix max_delta_ticks value for the pseudo clock device. Reported-by: Fengguang Wu Reported-by: Ingo Molnar Signed-off-by: Preeti U Murthy Link: http://lkml.kernel.org/r/52F719EE.9010304@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 20a7183f2831..2e4cb67f6e56 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -207,6 +207,7 @@ static inline void clockevents_resume(void) {} static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } +static inline void tick_setup_hrtimer_broadcast(void) {}; #endif -- cgit v1.2.3 From d0ea026808ad81de2af14938448419a95211b938 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 27 Jan 2014 22:00:45 -0500 Subject: sched: Implement task_nice() as static inline function As patch "sched: Move the priority specific bits into a new header file" exposes the priority related macros in linux/sched/prio.h, we don't have to implement task_nice() in kernel/sched/core.c any more. This patch implements it in linux/sched/sched.h as static inline function, saving the kernel stack and enhancing performance a bit. Signed-off-by: Dongsheng Yang Cc: clark.williams@gmail.com Cc: rostedt@goodmis.org Cc: raistlin@linux.it Cc: juri.lelli@gmail.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390878045-7096-1-git-send-email-yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 ++++++++++- include/linux/sched/prio.h | 1 - 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d97d0a8e87dc..e3d556427b2e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2094,7 +2094,16 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { } extern bool yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); -extern int task_nice(const struct task_struct *p); +/** + * task_nice - return the nice value of a given task. + * @p: the task in question. + * + * Return: The nice value [ -20 ... 0 ... 19 ]. + */ +static inline int task_nice(const struct task_struct *p) +{ + return PRIO_TO_NICE((p)->static_prio); +} extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 13216f16762e..410ccb74c9e6 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -27,7 +27,6 @@ */ #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) -#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) /* * 'User priority' is the nice value converted to something we -- cgit v1.2.3 From ddf1d169c0a489d498c1799a7043904a43b0c159 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:22 -0800 Subject: locking/mcs: Allow architecture specific asm files to be used for contended case This patch allows each architecture to add its specific assembly optimized arch_mcs_spin_lock_contended and arch_mcs_spinlock_uncontended for MCS lock and unlock functions. Signed-off-by: Tim Chen Cc: Scott J Norton Cc: Raghavendra K T Cc: AswinChandramouleeswaran Cc: George Spelvin Cc: Rik vanRiel Cc: Andrea Arcangeli Cc: MichelLespinasse Cc: Peter Hurley Cc: Andi Kleen Cc: Alex Shi Cc: Dave Hansen Cc: Tim Chen Cc: Arnd Bergmann Cc: "Figo.zhang" Cc: "Paul E.McKenney" Cc: "H. Peter Anvin" Cc: Davidlohr Bueso Cc: Waiman Long Cc: Ingo Molnar Cc: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Matthew R Wilcox Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390347382.3138.67.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index e9a4d74c63dc..f2a5c6360083 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -12,6 +12,8 @@ #ifndef __LINUX_MCS_SPINLOCK_H #define __LINUX_MCS_SPINLOCK_H +#include + struct mcs_spinlock { struct mcs_spinlock *next; int locked; /* 1 if lock acquired */ -- cgit v1.2.3 From fb9edbe98493fcd9df66de926ae9157cbe0e4dcd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:06 +0100 Subject: lockdep: Make held_lock->check and "int check" argument bool The "int check" argument of lock_acquire() and held_lock->check are misleading. This is actually a boolean: 2 means "true", everything else is "false". And there is no need to pass 1 or 0 to lock_acquire() depending on CONFIG_PROVE_LOCKING, __lock_acquire() checks prove_locking at the start and clears "check" if !CONFIG_PROVE_LOCKING. Note: probably we can simply kill this member/arg. The only explicit user of check => 0 is rcu_lock_acquire(), perhaps we can change it to use lock_acquire(trylock =>, read => 2). __lockdep_no_validate means check => 0 implicitly, but we can change validate_chain() to check hlock->instance->key instead. Not to mention it would be nice to get rid of lockdep_set_novalidate_class(). Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182006.GA26495@redhat.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 25 +++++++++---------------- include/linux/rcupdate.h | 2 +- 2 files changed, 10 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 92b1bfc5da60..1626047c1f26 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -252,9 +252,9 @@ struct held_lock { unsigned int trylock:1; /* 16 bits */ unsigned int read:2; /* see lock_acquire() comment */ - unsigned int check:2; /* see lock_acquire() comment */ + unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; - unsigned int references:11; /* 32 bits */ + unsigned int references:12; /* 32 bits */ }; /* @@ -326,9 +326,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock, * * Values for check: * - * 0: disabled - * 1: simple checks (freeing, held-at-exit-time, etc.) - * 2: full validation + * 0: simple checks (freeing, held-at-exit-time, etc.) + * 1: full validation */ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, @@ -479,15 +478,9 @@ static inline void print_irqtrace_events(struct task_struct *curr) * on the per lock-class debug mode: */ -#ifdef CONFIG_PROVE_LOCKING - #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) - #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 2, n, i) - #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 2, n, i) -#else - #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) - #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) - #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) -#endif +#define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) +#define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) +#define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) @@ -518,13 +511,13 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) # define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) #else diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a01a4ee..adff3c99dcaa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -314,7 +314,7 @@ static inline bool rcu_lockdep_current_cpu_online(void) static inline void rcu_lock_acquire(struct lockdep_map *map) { - lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); + lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) -- cgit v1.2.3 From 47be1c1a0e188232b5e5962917b21750053cd3f8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:16 +0100 Subject: lockdep: Change lockdep_set_novalidate_class() to use _and_name Cosmetic. This doesn't really matter because a) device->mutex is the only user of __lockdep_no_validate__ and b) this class should be never reported as the source of problem, but if something goes wrong "&dev->mutex" looks better than "&__lockdep_no_validate__" as the name of the lock. Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182016.GA26512@redhat.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1626047c1f26..060e5137fd80 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -303,7 +303,7 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.key, sub) #define lockdep_set_novalidate_class(lock) \ - lockdep_set_class(lock, &__lockdep_no_validate__) + lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) /* * Compare locking classes */ -- cgit v1.2.3 From fed14d45f945042a15b09de48d7d3d58d9455fc4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 11 Feb 2012 06:05:00 +0100 Subject: sched/fair: Track cgroup depth Track depth in cgroup tree, this is useful for things like find_matching_se() where you need to get to a common parent of two sched entities. Keeping the depth avoids having to calculate it on the spot, which saves a number of possible cache-misses. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e3d556427b2e..555e27d717c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1078,6 +1078,7 @@ struct sched_entity { #endif #ifdef CONFIG_FAIR_GROUP_SCHED + int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; -- cgit v1.2.3 From d47d5c8194579bce1d62f88e26fea91d7c553e42 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:58 +0100 Subject: asmlinkage: Make __iowrite32_copy visible This is a assembler function on x86, so it should be visible. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-2-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin --- include/linux/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index f4f42faec686..8a18e75600cc 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -24,7 +24,7 @@ struct device; -void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU -- cgit v1.2.3 From 63f9a7fde715352e0769302527670542a664b981 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:01 +0100 Subject: asmlinkage: Make lockdep_sys_exit asmlinkage lockdep_sys_exit can be called from assembler code, so make it asmlinkage. Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-5-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 92b1bfc5da60..7df9aa6902c0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -265,7 +265,7 @@ extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_free_key_range(void *start, unsigned long size); -extern void lockdep_sys_exit(void); +extern asmlinkage void lockdep_sys_exit(void); extern void lockdep_off(void); extern void lockdep_on(void); -- cgit v1.2.3 From 128ea04a9885af9629059e631ddf0cab4815b589 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:07 +0100 Subject: lto: Make asmlinkage __visible Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-3-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin --- include/linux/linkage.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index a6a42dd02466..34a513a2727b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -12,9 +12,9 @@ #endif #ifdef __cplusplus -#define CPP_ASMLINKAGE extern "C" +#define CPP_ASMLINKAGE extern "C" __visible #else -#define CPP_ASMLINKAGE +#define CPP_ASMLINKAGE __visible #endif #ifndef asmlinkage -- cgit v1.2.3 From ef1b893c29d0dba778f67ad97b554b37f9108dcc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:08 +0100 Subject: lto, workaround: Add workaround for initcall reordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Work around a LTO gcc problem: when there is no reference to a variable in a module it will be moved to the end of the program. This causes reordering of initcalls which the kernel does not like. Add a dummy reference function to avoid this. The function is deleted by the linker. This replaces a previous much slower workaround. Thanks to Jan "Honza" Hubička for suggesting this technique. Suggested-by: Jan Hubička Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-4-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin --- include/linux/init.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index e1688802964f..a3ba27076342 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -163,6 +163,23 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ +#ifdef CONFIG_LTO +/* Work around a LTO gcc problem: when there is no reference to a variable + * in a module it will be moved to the end of the program. This causes + * reordering of initcalls which the kernel does not like. + * Add a dummy reference function to avoid this. The function is + * deleted by the linker. + */ +#define LTO_REFERENCE_INITCALL(x) \ + ; /* yes this is needed */ \ + static __used __exit void *reference_##x(void) \ + { \ + return &x; \ + } +#else +#define LTO_REFERENCE_INITCALL(x) +#endif + /* initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined * by link order. @@ -175,7 +192,8 @@ extern bool initcall_debug; #define __define_initcall(fn, id) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn + __attribute__((__section__(".initcall" #id ".init"))) = fn; \ + LTO_REFERENCE_INITCALL(__initcall_##fn##id) /* * Early initcalls run before initializing SMP. -- cgit v1.2.3 From 478b360a47b71f3b5030eacd3aae6acb1a32c2b6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 15 Feb 2014 23:48:45 +0100 Subject: netfilter: nf_tables: fix nf_trace always-on with XT_TRACE=n When using nftables with CONFIG_NETFILTER_XT_TARGET_TRACE=n, we get lots of "TRACE: filter:output:policy:1 IN=..." warnings as several places will leave skb->nf_trace uninitialised. Unlike iptables tracing functionality is not conditional in nftables, so always copy/zero nf_trace setting when nftables is enabled. Move this into __nf_copy() helper. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af8cbf..d40d40b2915b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2725,7 +2725,7 @@ static inline void nf_reset(struct sk_buff *skb) static inline void nf_reset_trace(struct sk_buff *skb) { -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } @@ -2742,6 +2742,9 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); #endif +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) + dst->nf_trace = src->nf_trace; +#endif } static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) -- cgit v1.2.3 From 87de1cfdc55b16b794e245b07322340725149d62 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Dec 2013 10:02:52 -0800 Subject: rcu: Stop tracking FSF's postal address All of the RCU source files have the usual GPL header, which contains a long-obsolete postal address for FSF. To avoid the need to track the FSF office's movements, this commit substitutes the URL where GPL may be found. Reported-by: Greg KH Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutiny.h | 4 ++-- include/linux/rcutree.h | 4 ++-- include/linux/srcu.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a01a4ee..b200756ea4c0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2001 * diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6f01771b571c..c364e9148de2 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 72137ee8c603..08b084068967 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9b058eecd403..a2783cb5d275 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright (C) IBM Corporation, 2006 * Copyright (C) Fujitsu, 2012 -- cgit v1.2.3 From 41f4abd92a34f9c5110bbb870c04f8854604e28d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Dec 2013 15:10:23 -0800 Subject: rcu: Glue ASCII strings together Split strings make it difficult to find the code that resulted in a given console message, so this commit glues split strings back together despite the resulting long lines. Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b200756ea4c0..d946d3660633 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -479,11 +479,9 @@ static inline void rcu_preempt_sleep_check(void) do { \ rcu_preempt_sleep_check(); \ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ - "Illegal context switch in RCU-bh" \ - " read-side critical section"); \ + "Illegal context switch in RCU-bh read-side critical section"); \ rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ - "Illegal context switch in RCU-sched"\ - " read-side critical section"); \ + "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ @@ -518,16 +516,14 @@ static inline void rcu_preempt_sleep_check(void) #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ - " usage"); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ - " usage"); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -542,8 +538,7 @@ static inline void rcu_preempt_sleep_check(void) ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ rcu_lockdep_assert(c, \ - "suspicious rcu_dereference_index_check()" \ - " usage"); \ + "suspicious rcu_dereference_index_check() usage"); \ smp_read_barrier_depends(); \ (_________p1); \ }) -- cgit v1.2.3 From 0adab9b9aa18d7e90337d43567f1eec3d5401b81 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Dec 2013 16:19:15 -0800 Subject: rcu: Indentation and spacing fixes. This commit outdents expression-statement macros, thus repairing a few line-length complaints. Also fix some spacing errors called out by checkpatch.pl. Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rculist.h | 17 +++++++------- include/linux/rcupdate.h | 58 ++++++++++++++++++++++++------------------------ 2 files changed, 38 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index dbaf99084112..8183b46fbaa2 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -247,9 +247,10 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ - container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ - }) +({ \ + typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ +}) /** * Where are list_empty_rcu() and list_first_entry_rcu()? @@ -285,11 +286,11 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_first_or_null_rcu(ptr, type, member) \ - ({struct list_head *__ptr = (ptr); \ - struct list_head *__next = ACCESS_ONCE(__ptr->next); \ - likely(__ptr != __next) ? \ - list_entry_rcu(__next, type, member) : NULL; \ - }) +({ \ + struct list_head *__ptr = (ptr); \ + struct list_head *__next = ACCESS_ONCE(__ptr->next); \ + likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ +}) /** * list_for_each_entry_rcu - iterate over rcu list of given type diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d946d3660633..278a9da69ec4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -508,40 +508,40 @@ static inline void rcu_preempt_sleep_check(void) #endif /* #else #ifdef __CHECKER__ */ #define __rcu_access_pointer(p, space) \ - ({ \ - typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_dereference_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ - }) +({ \ + typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ +}) #define __rcu_dereference_check(p, c, space) \ - ({ \ - typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ - rcu_dereference_sparse(p, space); \ - smp_read_barrier_depends(); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ - }) +({ \ + typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ + rcu_dereference_sparse(p, space); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + ((typeof(*p) __force __kernel *)(_________p1)); \ +}) #define __rcu_dereference_protected(p, c, space) \ - ({ \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ - rcu_dereference_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(p)); \ - }) +({ \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(p)); \ +}) #define __rcu_access_index(p, space) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_dereference_sparse(p, space); \ - (_________p1); \ - }) +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + rcu_dereference_sparse(p, space); \ + (_________p1); \ +}) #define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_lockdep_assert(c, \ - "suspicious rcu_dereference_index_check() usage"); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + rcu_lockdep_assert(c, \ + "suspicious rcu_dereference_index_check() usage"); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable -- cgit v1.2.3 From 88c1863066ccfa456797e12c5d8b4631aa1ad0d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2013 13:24:32 -0800 Subject: rcu: Define rcu_assign_pointer() in terms of smp_store_release() The new smp_store_release() function provides better guarantees than did rcu_assign_pointer(), and potentially less overhead on some architectures. The guarantee that smp_store_release() provides that rcu_assign_pointer() does that is obscure, but its lack could cause considerable confusion. This guarantee is illustrated by the following code fragment: struct foo { int a; int b; int c; struct foo *next; }; struct foo foo1; struct foo foo2; struct foo __rcu *foop; ... foo2.a = 1; foo2.b = 2; BUG_ON(foo2.c); rcu_assign_pointer(foop, &foo); ... fp = rcu_dereference(foop); fp.c = 3; The current rcu_assign_pointer() semantics permit the BUG_ON() to trigger because rcu_assign_pointer()'s smp_wmb() is not guaranteed to order prior reads against later writes. This commit therefore upgrades rcu_assign_pointer() from smp_wmb() to smp_store_release() to avoid this counter-intuitive outcome. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 278a9da69ec4..32decf1a9c6c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ @@ -580,12 +581,7 @@ static inline void rcu_preempt_sleep_check(void) * please be careful when making changes to rcu_assign_pointer() and the * other macros that it invokes. */ -#define rcu_assign_pointer(p, v) \ - do { \ - smp_wmb(); \ - ACCESS_ONCE(p) = RCU_INITIALIZER(v); \ - } while (0) - +#define rcu_assign_pointer(p, v) smp_store_release(&p, RCU_INITIALIZER(v)) /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing -- cgit v1.2.3 From 2f33b512a5460578f6cf11d7b7867bed53157c7c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Nov 2013 18:25:48 -0800 Subject: rcu: Optimize rcu_is_nocb_cpu() for RCU_NOCB_CPU_ALL If CONFIG_RCU_NOCB_CPU_ALL=y, then rcu_is_nocb_cpu() will always return true, however, the current version nevertheless checks rcu_nocb_mask. This commit therefore creates a static inline implementation of rcu_is_nocb_cpu() that unconditionally returns true when CONFIG_RCU_NOCB_CPU_ALL=y. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a01a4ee..281c90f8989e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1015,11 +1015,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#ifdef CONFIG_RCU_NOCB_CPU +#if defined(CONFIG_RCU_NOCB_CPU_ALL) +static inline bool rcu_is_nocb_cpu(int cpu) { return true; } +#elif defined(CONFIG_RCU_NOCB_CPU) bool rcu_is_nocb_cpu(int cpu); #else static inline bool rcu_is_nocb_cpu(int cpu) { return false; } -#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ +#endif /* Only for use by adaptive-ticks code. */ -- cgit v1.2.3 From ffa83fb565fbc397cbafb4b71fd1cce276d4c3b6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Nov 2013 19:27:16 -0800 Subject: rcu: Optimize rcu_needs_cpu() for RCU_NOCB_CPU_ALL If CONFIG_RCU_NOCB_CPU_ALL=y, then rcu_needs_cpu() will always return false, however, the current version nevertheless checks for RCU callbacks. This commit therefore creates a static inline implementation of rcu_needs_cpu() that unconditionally returns false when CONFIG_RCU_NOCB_CPU_ALL=y. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 8 ++++++++ include/linux/rcutiny.h | 6 ------ include/linux/rcutree.h | 2 ++ 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 281c90f8989e..5f7d5f410d50 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1015,6 +1015,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +{ + *delta_jiffies = ULONG_MAX; + return 0; +} +#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ + #if defined(CONFIG_RCU_NOCB_CPU_ALL) static inline bool rcu_is_nocb_cpu(int cpu) { return true; } #elif defined(CONFIG_RCU_NOCB_CPU) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6f01771b571c..9524903487d0 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -68,12 +68,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, call_rcu(head, func); } -static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) -{ - *delta_jiffies = ULONG_MAX; - return 0; -} - static inline void rcu_note_context_switch(int cpu) { rcu_sched_qs(cpu); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 72137ee8c603..81198c84e268 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -31,7 +31,9 @@ #define __LINUX_RCUTREE_H void rcu_note_context_switch(int cpu); +#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ void rcu_cpu_stall_reset(void); /* -- cgit v1.2.3 From 45a22f4c11fef4ecd5c61c0a299cd3f23d77be8e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Feb 2014 13:09:50 +0100 Subject: inotify: Fix reporting of cookies for inotify events My rework of handling of notification events (namely commit 7053aee26a35 "fsnotify: do not share events between notification groups") broke sending of cookies with inotify events. We didn't propagate the value passed to fsnotify() properly and passed 4 uninitialized bytes to userspace instead (so it is also an information leak). Sadly I didn't notice this during my testing because inotify cookies aren't used very much and LTP inotify tests ignore them. Fix the problem by passing the cookie value properly. Fixes: 7053aee26a3548ebaba046ae2e52396ccf56ac6c Reported-by: Vegard Nossum Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3d286ff49ab0..c84bc7c2bfc8 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -99,7 +99,7 @@ struct fsnotify_ops { struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name); + const unsigned char *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); -- cgit v1.2.3 From 90d88bd75424dff51e2072fd2f8fa85ee893aa17 Mon Sep 17 00:00:00 2001 From: Tan Xiaojun Date: Sat, 15 Feb 2014 13:19:51 +0800 Subject: workqueue: Remove deprecated __cancel_delayed_work() __cancel_delayed_work() was deprecated by 136b5721d75a ("workqueue: deprecate __cancel_delayed_work()") as cancel_delayed_work() was updated so that it could be used from all contexts. Enough time has passed since the deprecation. Let's remove it. tj: description update Signed-off-by: Tan Xiaojun Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 594521ba0d43..edc941049d79 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -605,21 +605,6 @@ static inline bool keventd_up(void) return system_wq != NULL; } -/* - * Like above, but uses del_timer() instead of del_timer_sync(). This means, - * if it returns 0 the timer function may be running and the queueing is in - * progress. - */ -static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work) -{ - bool ret; - - ret = del_timer(&work->timer); - if (ret) - work_clear_pending(&work->work); - return ret; -} - /* used to be different but now identical to flush_work(), deprecated */ static inline bool __deprecated flush_work_sync(struct work_struct *work) { -- cgit v1.2.3 From 18258f7239a61d8929b8e0c7b6d46c446459074c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 15 Feb 2014 00:55:18 +0000 Subject: genirq: Provide synchronize_hardirq() synchronize_irq() waits for hard irq and threaded handlers to complete before returning. For some special cases we only need to make sure that the hard interrupt part of the irq line is not in progress when we disabled the - possibly shared - interrupt at the device level. A proper use case for this was provided by Russell. The sdhci driver requires some irq triggered functions to be run in thread context. The current implementation of the thread context is a sdio private kthread construct, which has quite some shortcomings. These can be avoided when the thread is directly associated to the device interrupt via the generic threaded irq infrastructure. Though there is a corner case related to run time power management where one side disables the device interrupts at the device level and needs to make sure, that an already running hard interrupt handler has completed before proceeding further. Though that hard interrupt handler might wake the associated thread, which in turn can request the runtime PM to reenable the device. Using synchronize_irq() leads to an immediate deadlock of the irq thread waiting for the PM lock and the synchronize_irq() waiting for the irq thread to complete. Due to the fact that it is sufficient for this case to ensure that no hard irq handler is executing a new function which avoids the check for the thread is required. Add a function, which just monitors the hard irq parts and ignores the threaded handlers. Signed-off-by: Thomas Gleixner Tested-by: Russell King Cc: Chris Ball Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140215003823.653236081@linutronix.de --- include/linux/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 12d5f972f23f..cba442ec3c66 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -9,6 +9,7 @@ extern void synchronize_irq(unsigned int irq); +extern void synchronize_hardirq(unsigned int irq); #if defined(CONFIG_TINY_RCU) -- cgit v1.2.3 From a92444c6b2225a9115d661c950cb48a22aeace20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 15 Feb 2014 00:55:19 +0000 Subject: genirq: Provide irq_wake_thread() In course of the sdhci/sdio discussion with Russell about killing the sdio kthread hackery we discovered the need to be able to wake an interrupt thread from software. The rationale for this is, that sdio hardware can lack proper interrupt support for certain features. So the driver needs to poll the status registers, but at the same time it needs to be woken up by an hardware interrupt. To be able to get rid of the home brewn kthread construct of sdio we need a way to wake an irq thread independent of an actual hardware interrupt. Provide an irq_wake_thread() function which wakes up the thread which is associated to a given dev_id. This allows sdio to invoke the irq thread from the hardware irq handler via the IRQ_WAKE_THREAD return value and provides a possibility to wake it via a timer for the polling scenarios. That allows to simplify the sdio logic significantly. Signed-off-by: Thomas Gleixner Cc: Russell King Cc: Chris Ball Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140215003823.772565780@linutronix.de --- include/linux/interrupt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a2678d35b5a2..c7bfac1c4a7b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -188,6 +188,7 @@ extern void disable_irq(unsigned int irq); extern void disable_percpu_irq(unsigned int irq); extern void enable_irq(unsigned int irq); extern void enable_percpu_irq(unsigned int irq, unsigned int type); +extern void irq_wake_thread(unsigned int irq, void *dev_id); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); -- cgit v1.2.3 From 994c41ee0ac875797b4dfef509ac7753e2649b4d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 30 Jan 2014 13:17:20 +0200 Subject: ARM: OMAP2+: clock: fix clkoutx2 with CLK_SET_RATE_PARENT If CLK_SET_RATE_PARENT is set for a clkoutx2 clock, calling clk_set_rate() on the clock "skips" the x2 multiplier as there are no set_rate and round_rate functions defined for the clkoutx2. This results in getting double the requested clock rates, breaking the display on omap3430 based devices. This got broken when d0f58bd3bba3877fb1af4664c4e33273d36f00e4 and related patches were merged for v3.14, as omapdss driver now relies more on the clk-framework and CLK_SET_RATE_PARENT. This patch implements set_rate and round_rate for clkoutx2. Tested on OMAP3430, OMAP3630, OMAP4460. Signed-off-by: Tomi Valkeinen Acked-by: Tero Kristo Signed-off-by: Paul Walmsley --- include/linux/clk/ti.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 092b64168d7f..4a21a872dbbd 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -245,6 +245,10 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, void omap2_init_clk_clkdm(struct clk_hw *clk); unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate); int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); -- cgit v1.2.3 From 634391ace193d00c59a691e9fc227b0f8942bad7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 13 Feb 2014 13:53:33 +0100 Subject: mm: mask bits from pmd in pmd_lockptr/pmd_huge_pte The pmd pointer passed to pmd_lockptr/pmd_huge_pte can point to any entry in a pmd table. With USE_SPLIT_PMD_PTLOCKS==1 the code uses virt_to_page to get a struct page for the pmd table. The virt_to_page function automatically masks the lower PAGE_SHIFT bits from the address. But if the size of a pmd table is larger than PAGE_SIZE the additional bits are not removed from the pmd address and the wrong page struct is used. Fix this by explicitely masking the offset in the pmd table from the pmd pointer. Signed-off-by: Martin Schwidefsky --- include/linux/mm.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f28f46eade6a..d354a72e6127 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1477,9 +1477,15 @@ static inline void pgtable_page_dtor(struct page *page) #if USE_SPLIT_PMD_PTLOCKS +static struct page *pmd_to_page(pmd_t *pmd) +{ + unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + return virt_to_page((void *)((unsigned long) pmd & mask)); +} + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(virt_to_page(pmd)); + return ptlock_ptr(pmd_to_page(pmd)); } static inline bool pgtable_pmd_page_ctor(struct page *page) @@ -1498,7 +1504,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) ptlock_free(page); } -#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte) #else -- cgit v1.2.3 From feb71dae1f9e0aeb056f7f639a21e620d327fc66 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:37 -0800 Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request It's almost identical to blk_mq_insert_request, so fold the two into one slightly more generic function by making the flush special case a bit smarted. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 18ba8a627f46..ff28fe37ddda 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -121,8 +121,7 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, - bool, bool); +void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v1.2.3 From d6a25b31315327eef7785b895c354cc45c3f3742 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:38 -0800 Subject: blk-mq: support partial I/O completions Add a new blk_mq_end_io_partial function to partially complete requests as needed by the SCSI layer. We do this by reusing blk_update_request to advance the bio instead of having a simplified version of it in the blk-mq code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff28fe37ddda..2ff2e8d982be 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -133,7 +133,13 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); -void blk_mq_end_io(struct request *rq, int error); +bool blk_mq_end_io_partial(struct request *rq, int error, + unsigned int nr_bytes); +static inline void blk_mq_end_io(struct request *rq, int error) +{ + bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); + BUG_ON(!done); +} void blk_mq_complete_request(struct request *rq); -- cgit v1.2.3 From cd578abb24aa67ce468c427d3356c08ea32cf768 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Feb 2014 16:01:16 +0100 Subject: perf/x86: Warn to early_printk() in case irq_work is too slow On Mon, Feb 10, 2014 at 08:45:16AM -0800, Dave Hansen wrote: > The reason I coded this up was that NMIs were firing off so fast that > nothing else was getting a chance to run. With this patch, at least the > printk() would come out and I'd have some idea what was going on. It will start spewing to early_printk() (which is a lot nicer to use from NMI context too) when it fails to queue the IRQ-work because its already enqueued. It does have the false-positive for when two CPUs trigger the warn concurrently, but that should be rare and some extra clutter on the early printk shouldn't be a problem. Cc: hpa@zytor.com Cc: tglx@linutronix.de Cc: dzickus@redhat.com Cc: Dave Hansen Cc: mingo@kernel.org Fixes: 6a02ad66b2c4 ("perf/x86: Push the duration-logging printk() to IRQ context") Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140211150116.GO27965@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- include/linux/irq_work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index add13c8624b7..19ae05d4b8ec 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -32,7 +32,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } -void irq_work_queue(struct irq_work *work); +bool irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); -- cgit v1.2.3 From ae58faf90050ea72c289f62c4bc9a5bb8ae7f0bd Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:09 +0100 Subject: perf/x86/uncore: add PCI ids for SNB/IVB/HSW IMC This patch adds the PCI ids for the Intel SandyBridge, IvyBridge, Haswell Client memory controller (IMC). Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-4-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 97fbecdd7a40..7399e6a3e9a0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2531,6 +2531,9 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- cgit v1.2.3 From 0dc83bd30b0bf5410c0933cfbbf8853248eff0a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 11:19:04 +0100 Subject: Revert "writeback: do not sync data dirtied after sync start" This reverts commit c4a391b53a72d2df4ee97f96f78c1d5971b47489. Dave Chinner has reported the commit may cause some inodes to be left out from sync(2). This is because we can call redirty_tail() for some inode (which sets i_dirtied_when to current time) after sync(2) has started or similarly requeue_inode() can set i_dirtied_when to current time if writeback had to skip some pages. The real problem is in the functions clobbering i_dirtied_when but fixing that isn't trivial so revert is a safer choice for now. CC: stable@vger.kernel.org # >= 3.13 Signed-off-by: Jan Kara --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fc0e4320aa6d..021b8a319b9e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -97,7 +97,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this); +void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); -- cgit v1.2.3 From 8f47b1871b8aac98f1a9d93bc3467fb97b65199a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:39 +0100 Subject: sched: Add better debug output for might_sleep() might_sleep() can tell us where interrupts have been disabled, but we have no idea what disabled preemption. Add some debug infrastructure. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-4-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c49a2585ff7d..825ed838d4b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1463,6 +1463,9 @@ struct task_struct { struct mutex perf_event_mutex; struct list_head perf_event_list; #endif +#ifdef CONFIG_DEBUG_PREEMPT + unsigned long preempt_disable_ip; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; -- cgit v1.2.3 From c365c292d05908c6ea6f32708f331e21033fe71d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:42 +0100 Subject: sched: Consider pi boosting in setscheduler() If a PI boosted task policy/priority is modified by a setscheduler() call we unconditionally dequeue and requeue the task if it is on the runqueue even if the new priority is lower than the current effective boosted priority. This can result in undesired reordering of the priority bucket list. If the new priority is less or equal than the current effective we just store the new parameters in the task struct and leave the scheduler class and the runqueue untouched. This is handled when the task deboosts itself. Only if the new priority is higher than the effective boosted priority we apply the change immediately. Signed-off-by: Thomas Gleixner [ Rebase ontop of v3.14-rc1. ] Signed-off-by: Sebastian Andrzej Siewior Cc: Dario Faggioli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-7-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index f7453d4c5613..6341f5be6e24 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,6 +18,7 @@ static inline int rt_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern int rt_mutex_check_prio(struct task_struct *task, int newprio); extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) @@ -29,6 +30,12 @@ static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } + +static inline int rt_mutex_check_prio(struct task_struct *task, int newprio) +{ + return 0; +} + static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; -- cgit v1.2.3 From 7e298d60f717257dc8365c975f45ff9c37165362 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:45 +0800 Subject: sched/prio: Use DEFAULT_PRIO to define NICE_TO_PRIO() and PRIO_TO_NICE() There is already a macro named DEFAULT_PRIO in prio.h, we can use it to define NICE_TO_PRIO and PRIO_TO_NICE rather than use hard coding of (MAX_RT_PRIO + 20). Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4e28ec36fb49e8906027cbbdd900ab26a149905e.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/prio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 410ccb74c9e6..1ceaaa1da3e4 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -25,8 +25,8 @@ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. */ -#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) -#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) +#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO) +#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO) /* * 'User priority' is the nice value converted to something we -- cgit v1.2.3 From 3ee237dddcd885d4e525791299d62de33b2ca117 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:46 +0800 Subject: sched/prio: Add 3 macros of MAX_NICE, MIN_NICE and NICE_WIDTH in prio.h Currently there is lots of hard coding to 19 and -20, to represent maximum and minimum of nice values. This patch add three macros in prio.h for maximum, minimum and width of nice value, and uses it to remove hardcoded values in prio.h. Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/3994e89327b2b15f992277cdf9f409c516f87d1b.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner [ Collapsed two small patches. ] Signed-off-by: Ingo Molnar --- include/linux/sched/prio.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 1ceaaa1da3e4..ac322583c820 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -1,6 +1,10 @@ #ifndef _SCHED_PRIO_H #define _SCHED_PRIO_H +#define MAX_NICE 19 +#define MIN_NICE -20 +#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1) + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH @@ -17,8 +21,8 @@ #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) +#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) +#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) /* * Convert user-nice values [ -20 ... 0 ... 19 ] -- cgit v1.2.3 From 156c5887948cd191417f18026aab9ce26e5a95da Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:31 +0100 Subject: ahci-platform: Add support for devices with more then 1 clock The allwinner-sun4i AHCI controller needs 2 clocks to be enabled and the imx AHCI controller needs 3 clocks to be enabled. tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 73a25005d88a..769d06525320 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -19,6 +19,7 @@ struct device; struct ata_port_info; +struct ahci_host_priv; struct ahci_platform_data { int (*init)(struct device *dev, void __iomem *addr); @@ -30,4 +31,7 @@ struct ahci_platform_data { unsigned int mask_port_map; }; +int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); +void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); + #endif /* _AHCI_PLATFORM_H */ -- cgit v1.2.3 From 96a01ba52c60fdd74dd6e8cf06645d06515b1396 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:33 +0100 Subject: ahci-platform: Add enable_ / disable_resources helper functions tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 769d06525320..b674b01ce9bc 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -33,5 +33,7 @@ struct ahci_platform_data { int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); +int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); +void ahci_platform_disable_resources(struct ahci_host_priv *hpriv); #endif /* _AHCI_PLATFORM_H */ -- cgit v1.2.3 From 23b07d4cb3c0c850055cf968af44019b8da185fb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:34 +0100 Subject: ahci-platform: "Library-ise" ahci_probe functionality ahci_probe consists of 3 steps: 1) Get resources (get mmio, clks, regulator) 2) Enable resources, handled by ahci_platform_enable_resouces 3) The more or less standard ahci-host controller init sequence This commit refactors step 1 and 3 into separate functions, so the platform drivers for AHCI implementations which need a specific order in step 2, and / or need to do some custom register poking at some time, can re-use ahci-platform.c code without needing to copy and paste it. Note that ahci_platform_init_host's prototype takes the 3 non function members of ahci_platform_data as arguments, the idea is that drivers using the new exported utility functions will not use ahci_platform_data at all, and hopefully in the future ahci_platform_data can go away entirely. tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index b674b01ce9bc..b80c51c20f76 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -20,7 +20,14 @@ struct device; struct ata_port_info; struct ahci_host_priv; +struct platform_device; +/* + * Note ahci_platform_data is deprecated, it is only kept around for use + * by the old da850 and spear13xx ahci code. + * New drivers should instead declare their own platform_driver struct, and + * use ahci_platform* functions in their own probe, suspend and resume methods. + */ struct ahci_platform_data { int (*init)(struct device *dev, void __iomem *addr); void (*exit)(struct device *dev); @@ -35,5 +42,12 @@ int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); void ahci_platform_disable_resources(struct ahci_host_priv *hpriv); +struct ahci_host_priv *ahci_platform_get_resources( + struct platform_device *pdev); +int ahci_platform_init_host(struct platform_device *pdev, + struct ahci_host_priv *hpriv, + const struct ata_port_info *pi_template, + unsigned int force_port_map, + unsigned int mask_port_map); #endif /* _AHCI_PLATFORM_H */ -- cgit v1.2.3 From 648cb6fd83b97f0f772db783a280af300fa9f2bc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:35 +0100 Subject: ahci-platform: "Library-ise" suspend / resume functionality Split suspend / resume code into host suspend / resume functionality and resource enable / disabling phases, and export the new suspend_ / resume_host functions. tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index b80c51c20f76..542f2686eb1d 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -50,4 +50,9 @@ int ahci_platform_init_host(struct platform_device *pdev, unsigned int force_port_map, unsigned int mask_port_map); +int ahci_platform_suspend_host(struct device *dev); +int ahci_platform_resume_host(struct device *dev); +int ahci_platform_suspend(struct device *dev); +int ahci_platform_resume(struct device *dev); + #endif /* _AHCI_PLATFORM_H */ -- cgit v1.2.3 From 6ef95e87763fd69889655f4f14e499377a54d066 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 17:22:55 +0100 Subject: ahci_platform: Drop unused ahci_platform_data members These members are not used anywhere, and in the future we want ahci_platform_data to go away entirely so there is no reason to keep these around. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 542f2686eb1d..1f16d502600c 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -33,9 +33,6 @@ struct ahci_platform_data { void (*exit)(struct device *dev); int (*suspend)(struct device *dev); int (*resume)(struct device *dev); - const struct ata_port_info *ata_port_info; - unsigned int force_port_map; - unsigned int mask_port_map; }; int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); -- cgit v1.2.3 From 51b1130eb5823ddb90a9ad07d243031d8cb7ecf2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Jan 2014 11:49:39 -0800 Subject: rcutorture: Abstract rcu_torture_random() Because rcu_torture_random() will be used by the locking equivalent to rcutorture, pull it out into its own module. This new module cannot be separately configured, instead, use the Kconfig "select" statement from the Kconfig options of tests depending on it. Suggested-by: Rusty Russell Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/torture.h (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h new file mode 100644 index 000000000000..979e3e6b378a --- /dev/null +++ b/include/linux/torture.h @@ -0,0 +1,47 @@ +/* + * Common functions for in-kernel torture tests. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2014 + * + * Author: Paul E. McKenney + */ + +#ifndef __LINUX_TORTURE_H +#define __LINUX_TORTURE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct torture_random_state { + unsigned long trs_state; + long trs_count; +}; + +#define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } + +unsigned long torture_random(struct torture_random_state *trsp); + +#endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 9e2502254132261e0ea8010692fd447b1cedf627 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Jan 2014 16:27:00 -0800 Subject: rcutorture: Abstract torture_param() Create a torture_param() macro and apply it to rcutorture in order to save a few lines of code. This same macro may be applied to other torture frameworks. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 979e3e6b378a..5aae880b6b4e 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -35,13 +35,18 @@ #include #include +/* Definitions for a non-string torture-test module parameter. */ +#define torture_param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +/* Low-rider random number generator. */ struct torture_random_state { unsigned long trs_state; long trs_count; }; - #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } - unsigned long torture_random(struct torture_random_state *trsp); #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From c2884de38e01134ae040d55aa5644049d1bb850f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Jan 2014 07:30:50 -0800 Subject: rcutorture: Abstract TOROUT_STRING() and friends These diagnostic macros are not confined to torturing RCU, so this commit makes them available to other torture tests. Also removed the do-while from TOROUT_STRING() in response to checkpatch complaints. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 5aae880b6b4e..09be8887fb08 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -41,6 +41,14 @@ module_param(name, type, 0444); \ MODULE_PARM_DESC(name, msg); +#define TORTURE_FLAG "-torture:" +#define TOROUT_STRING(s) \ + pr_alert("%s" TORTURE_FLAG s "\n", torture_type) +#define VERBOSE_TOROUT_STRING(s) \ + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) +#define VERBOSE_TOROUT_ERRSTRING(s) \ + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + /* Low-rider random number generator. */ struct torture_random_state { unsigned long trs_state; -- cgit v1.2.3 From f67a33561e6e5463b548219df98130da95f2e4a7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Jan 2014 07:40:27 -0800 Subject: rcutorture: Abstract torture_shutdown_absorb() Because handling races between rmmod and normal shutdown is not specific to rcutorture, this commit renames rcutorture_shutdown_absorb() to torture_shutdown_absorb() and pulls it out into then kernel/torture.c module. This implies pulling the fullstop mechanism into kernel/torture.c as well. The exporting of fullstop and fullstop_mutex is ugly and must die. And it does in fact die in later commits that introduce higher-level APIs that encapsulate both of these variables. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett ` --- include/linux/torture.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 09be8887fb08..8474d776c864 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -41,6 +41,18 @@ module_param(name, type, 0444); \ MODULE_PARM_DESC(name, msg); +/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ +#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ +#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ +#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ +extern int fullstop; +/* Protect fullstop transitions and spawning of kthreads. */ +extern struct mutex fullstop_mutex; + +/* Common module parameters. */ +extern char *torture_type; +extern bool verbose; + #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) @@ -57,4 +69,7 @@ struct torture_random_state { #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } unsigned long torture_random(struct torture_random_state *trsp); +/* Shutdown task absorption, for when the tasks cannot safely be killed. */ +void torture_shutdown_absorb(const char *title); + #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 3808dc9fab05913060626d7f0edd0f195cb9dcab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Jan 2014 15:29:21 -0800 Subject: rcutorture: Abstract torture_shuffle() The torture_shuffle() function forces each CPU in turn to go idle periodically in order to check for problems interacting with per-CPU variables and with dyntick-idle mode. Because this sort of debugging is not specific to RCU, this commit abstracts that functionality. This in turn requires abstracting some additional infrastructure. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 8474d776c864..544a2c33c1ae 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -69,6 +69,11 @@ struct torture_random_state { #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } unsigned long torture_random(struct torture_random_state *trsp); +/* Task shuffler, which causes CPUs to occasionally go idle. */ +void torture_shuffle_task_register(struct task_struct *tp); +int torture_shuffle_init(long shuffint); +void torture_shuffle_cleanup(void); + /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); -- cgit v1.2.3 From 2e9e8081d2e7a4efb582a240aa7fee991bbbabb0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Jan 2014 15:58:22 -0800 Subject: rcutorture: Abstract torture_onoff() Because online/offline torturing is not specific to RCU, this commit abstracts it into the kernel/torture.c module to allow other torture tests to use it. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 544a2c33c1ae..be3694a702e9 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -61,6 +61,18 @@ extern bool verbose; #define VERBOSE_TOROUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) +/* Definitions for a non-string torture-test module parameter. */ +#define torture_parm(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +/* Definitions for online/offline exerciser. */ +int torture_onoff_init(long ooholdoff, long oointerval); +void torture_onoff_cleanup(void); +char *torture_onoff_stats(char *page); +bool torture_onoff_failures(void); + /* Low-rider random number generator. */ struct torture_random_state { unsigned long trs_state; -- cgit v1.2.3 From b5daa8f3b3b2b0133ad40e13d4f722070119ce36 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 13:38:09 -0800 Subject: rcutorture: Abstract torture-test initialization This commit creates torture_init_begin() and torture_init_end() functions to abstract locking and allow the torture_type and verbose variables in kernel/torture.o to become static. With a bit more abstraction, fullstop_mutex will also become static. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index be3694a702e9..428d2712be04 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -49,10 +49,6 @@ extern int fullstop; /* Protect fullstop transitions and spawning of kthreads. */ extern struct mutex fullstop_mutex; -/* Common module parameters. */ -extern char *torture_type; -extern bool verbose; - #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) @@ -89,4 +85,9 @@ void torture_shuffle_cleanup(void); /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); +/* Initialization and cleanup. */ + +void torture_init_begin(char *ttype, bool v); +void torture_init_end(void); + #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From cc47ae0830264f07442070b36fe0d0a4d4e3c313 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 14:21:11 -0800 Subject: rcutorture: Abstract torture-test cleanup This commit creates a torture_cleanup() that handles the generic cleanup actions local to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 428d2712be04..31d69930e5cf 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -65,7 +65,6 @@ extern struct mutex fullstop_mutex; /* Definitions for online/offline exerciser. */ int torture_onoff_init(long ooholdoff, long oointerval); -void torture_onoff_cleanup(void); char *torture_onoff_stats(char *page); bool torture_onoff_failures(void); @@ -80,14 +79,13 @@ unsigned long torture_random(struct torture_random_state *trsp); /* Task shuffler, which causes CPUs to occasionally go idle. */ void torture_shuffle_task_register(struct task_struct *tp); int torture_shuffle_init(long shuffint); -void torture_shuffle_cleanup(void); /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); /* Initialization and cleanup. */ - void torture_init_begin(char *ttype, bool v); void torture_init_end(void); +bool torture_cleanup(void); #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 4622b487ecf0094401ac10e504606e5cbdea5a6e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 15:37:19 -0800 Subject: rcutorture: Abstract torture_shutdown_notify() Because handling the race between rmmod and system shutdown is not specific to RCU, this commit abstracts torture_shutdown_notify(), placing this code into kernel/torture.c. This change also allows fullstop_mutex to be private to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 31d69930e5cf..742d8a402f19 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -46,8 +46,6 @@ #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ extern int fullstop; -/* Protect fullstop transitions and spawning of kthreads. */ -extern struct mutex fullstop_mutex; #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ -- cgit v1.2.3 From 36970bb91d89618d3495babf44b934e9c9db6bbc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 15:49:29 -0800 Subject: rcutorture: Privatize fullstop This commit introduces the torture_must_stop() function in order to keep use of the fullstop variable local to kernel/torture.c. There is also a torture_must_stop_irq() counterpart for use from RCU callbacks, timeout handlers, and the like. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 742d8a402f19..0259db38bfb0 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -41,12 +41,6 @@ module_param(name, type, 0444); \ MODULE_PARM_DESC(name, msg); -/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ -#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ -#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ -#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ -extern int fullstop; - #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) @@ -85,5 +79,7 @@ void torture_shutdown_absorb(const char *title); void torture_init_begin(char *ttype, bool v); void torture_init_end(void); bool torture_cleanup(void); +bool torture_must_stop(void); +bool torture_must_stop_irq(void); #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 628edaa5062282b6e3d76c886fd2cbccae5cb87b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 11:57:43 -0800 Subject: rcutorture: Abstract stutter_wait() Because stuttering the test load (stopping and restarting it) is useful for non-RCU testing, this commit moves the load-stuttering functionality to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 0259db38bfb0..203f127d9ddf 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -75,8 +75,13 @@ int torture_shuffle_init(long shuffint); /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); +/* Task stuttering, which forces load/no-load transitions. */ +void stutter_wait(const char *title); +int torture_stutter_init(int s); +void torture_stutter_cleanup(void); + /* Initialization and cleanup. */ -void torture_init_begin(char *ttype, bool v); +void torture_init_begin(char *ttype, bool v, int *runnable); void torture_init_end(void); bool torture_cleanup(void); bool torture_must_stop(void); -- cgit v1.2.3 From e991dbc0770b01b7dc7d6d7660442e83ebd11828 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 14:52:13 -0800 Subject: rcutorture: Abstract torture_shutdown() Because auto-shutdown of torture testing is not specific to RCU, this commit moves the auto-shutdown function to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 203f127d9ddf..c79c41d543ef 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -72,8 +72,10 @@ unsigned long torture_random(struct torture_random_state *trsp); void torture_shuffle_task_register(struct task_struct *tp); int torture_shuffle_init(long shuffint); -/* Shutdown task absorption, for when the tasks cannot safely be killed. */ +/* Test auto-shutdown handling. */ void torture_shutdown_absorb(const char *title); +int torture_shutdown_init(int ssecs, void (*cleanup)(void)); +void torture_shutdown_cleanup(void); /* Task stuttering, which forces load/no-load transitions. */ void stutter_wait(const char *title); -- cgit v1.2.3 From 7fafaac5b9ce22cc57777865390520476ad2262d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 17:37:28 -0800 Subject: rcutorture: Fix rcutorture shutdown races Not all of the rcutorture kthreads waited for kthread_should_stop() before returning from their top-level functions, and none of them used torture_shutdown_absorb() properly. These problems can result in segfaults and hangs at shutdown time, and some recent changes perturbed timing sufficiently to make them much more probable. This commit therefore creates a torture_kthread_stopping() function that does the proper kthread shutdown dance in one centralized location. Accommodate this grouping by making VERBOSE_TOROUT_STRING() capable of taking a non-const string as its argument, which allows the new torture_kthread_stopping() to pass its "title" argument directly to the updated version of VERBOSE_TOROUT_STRING(). Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index c79c41d543ef..2ea11094daf6 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -45,7 +45,7 @@ #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) #define VERBOSE_TOROUT_STRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0) #define VERBOSE_TOROUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) @@ -88,5 +88,6 @@ void torture_init_end(void); bool torture_cleanup(void); bool torture_must_stop(void); bool torture_must_stop_irq(void); +void torture_kthread_stopping(char *title); #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 47cf29b9e721967aac95ebda9e50408219755852 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Feb 2014 11:52:27 -0800 Subject: rcutorture: Abstract torture_create_kthread() Creation of kthreads is not RCU-specific, so this commit abstracts out torture_create_kthread(), saving a few tens of lines of code in the process. This change requires modifying VERBOSE_TOROUT_ERRSTRING() to take a non-const string, so that _torture_create_kthread() can avoid an open-coded substitute. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 2ea11094daf6..430cc3008628 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -47,7 +47,7 @@ #define VERBOSE_TOROUT_STRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0) #define VERBOSE_TOROUT_ERRSTRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0) /* Definitions for a non-string torture-test module parameter. */ #define torture_parm(type, name, init, msg) \ @@ -89,5 +89,11 @@ bool torture_cleanup(void); bool torture_must_stop(void); bool torture_must_stop_irq(void); void torture_kthread_stopping(char *title); +int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, + char *f, struct task_struct **tp); + +#define torture_create_kthread(n, arg, tp) \ + _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ + "Failed to create " #n, &(tp)) #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 9c029b86098decd4660eec511b8d2d42da3e7dd9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Feb 2014 11:47:08 -0800 Subject: rcutorture: Abstract torture_stop_kthread() Stopping of kthreads is not RCU-specific, so this commit abstracts out torture_stop_kthread(), saving a few lines of code in the process. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 430cc3008628..7ccfb0a16728 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -91,9 +91,12 @@ bool torture_must_stop_irq(void); void torture_kthread_stopping(char *title); int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, char *f, struct task_struct **tp); +void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_create_kthread(n, arg, tp) \ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ "Failed to create " #n, &(tp)) +#define torture_stop_kthread(n, tp) \ + _torture_stop_kthread("Stopping " #n " task", &(tp)) #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From bfefc73aa1d1bad317bccef8a15da39263d3d962 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Feb 2014 12:35:27 -0800 Subject: rcutorture: Stop generic kthreads in torture_cleanup() The specific torture modules (like rcutorture) need to call torture_cleanup() in any case, so this commit makes torture_cleanup() deal with torture_shutdown_cleanup() and torture_stutter_cleanup() so that the specific modules don't have to deal with these details. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/torture.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 7ccfb0a16728..b2e2b468e511 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -75,12 +75,10 @@ int torture_shuffle_init(long shuffint); /* Test auto-shutdown handling. */ void torture_shutdown_absorb(const char *title); int torture_shutdown_init(int ssecs, void (*cleanup)(void)); -void torture_shutdown_cleanup(void); /* Task stuttering, which forces load/no-load transitions. */ void stutter_wait(const char *title); int torture_stutter_init(int s); -void torture_stutter_cleanup(void); /* Initialization and cleanup. */ void torture_init_begin(char *ttype, bool v, int *runnable); -- cgit v1.2.3 From ff57cd5863cf3014c1c5ed62ce2715294f065b17 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:14:11 +0100 Subject: fsnotify: Allocate overflow events with proper type Commit 7053aee26a35 "fsnotify: do not share events between notification groups" used overflow event statically allocated in a group with the size of the generic notification event. This causes problems because some code looks at type specific parts of event structure and gets confused by a random data it sees there and causes crashes. Fix the problem by allocating overflow event with type corresponding to the group type so code cannot get confused. Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c84bc7c2bfc8..64cf3ef50696 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -160,7 +160,7 @@ struct fsnotify_group { struct fasync_struct *fsn_fa; /* async notification */ - struct fsnotify_event overflow_event; /* Event we queue when the + struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ -- cgit v1.2.3 From fed95bab8d29b928fcf6225be72d37ded452e8a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 25 Feb 2014 19:28:44 +0800 Subject: sysfs: fix namespace refcnt leak As mount() and kill_sb() is not a one-to-one match, we shoudn't get ns refcnt unconditionally in sysfs_mount(), and instead we should get the refcnt only when kernfs_mount() allocated a new superblock. v2: - Changed the name of the new argument, suggested by Tejun. - Made the argument optional, suggested by Tejun. v3: - Make the new argument as second-to-last arg, suggested by Tejun. Signed-off-by: Li Zefan Acked-by: Tejun Heo --- fs/kernfs/mount.c | 8 +++++++- fs/sysfs/mount.c | 5 +++-- include/linux/kernfs.h | 9 +++++---- 3 files changed, 15 insertions(+), 7 deletions(-) Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5be9f0228a3b..d267623c28cf 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -249,7 +249,8 @@ void kernfs_notify(struct kernfs_node *kn); const void *kernfs_super_ns(struct super_block *sb); struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns); + struct kernfs_root *root, bool *new_sb_created, + const void *ns); void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); @@ -317,7 +318,7 @@ static inline const void *kernfs_super_ns(struct super_block *sb) static inline struct dentry * kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns) + struct kernfs_root *root, bool *new_sb_created, const void *ns) { return ERR_PTR(-ENOSYS); } static inline void kernfs_kill_sb(struct super_block *sb) { } @@ -368,9 +369,9 @@ static inline int kernfs_remove_by_name(struct kernfs_node *parent, static inline struct dentry * kernfs_mount(struct file_system_type *fs_type, int flags, - struct kernfs_root *root) + struct kernfs_root *root, bool *new_sb_created) { - return kernfs_mount_ns(fs_type, flags, root, NULL); + return kernfs_mount_ns(fs_type, flags, root, new_sb_created, NULL); } #endif /* __LINUX_KERNFS_H */ -- cgit v1.2.3 From f3713fd9cff733d9df83116422d8e4af6e86b2bb Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 25 Feb 2014 15:01:45 -0800 Subject: ipc,mqueue: remove limits for the amount of system-wide queues Commit 93e6f119c0ce ("ipc/mqueue: cleanup definition names and locations") added global hardcoded limits to the amount of message queues that can be created. While these limits are per-namespace, reality is that it ends up breaking userspace applications. Historically users have, at least in theory, been able to create up to INT_MAX queues, and limiting it to just 1024 is way too low and dramatic for some workloads and use cases. For instance, Madars reports: "This update imposes bad limits on our multi-process application. As our app uses approaches that each process opens its own set of queues (usually something about 3-5 queues per process). In some scenarios we might run up to 3000 processes or more (which of-course for linux is not a problem). Thus we might need up to 9000 queues or more. All processes run under one user." Other affected users can be found in launchpad bug #1155695: https://bugs.launchpad.net/ubuntu/+source/manpages/+bug/1155695 Instead of increasing this limit, revert it entirely and fallback to the original way of dealing queue limits -- where once a user's resource limit is reached, and all memory is used, new queues cannot be created. Signed-off-by: Davidlohr Bueso Reported-by: Madars Vitolins Acked-by: Doug Ledford Cc: Manfred Spraul Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e7831d203737..35e7eca4e33b 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -118,9 +118,7 @@ extern int mq_init_ns(struct ipc_namespace *ns); * the new maximum will handle anyone else. I may have to revisit this * in the future. */ -#define MIN_QUEUESMAX 1 #define DFLT_QUEUESMAX 256 -#define HARD_QUEUESMAX 1024 #define MIN_MSGMAX 1 #define DFLT_MSG 10U #define DFLT_MSGMAX 10 -- cgit v1.2.3 From 7a754743185a4b05818e10058fa2fbe4e6969085 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 11 Feb 2014 16:10:12 -0500 Subject: rcu: Fix sparse warning for rcu_expedited from kernel/ksysfs.c This commit fixes the follwoing warning: kernel/ksysfs.c:143:5: warning: symbol 'rcu_expedited' was not declared. Should it be static? Signed-off-by: Paul Gortmaker [ paulmck: Moved the declaration to include/linux/rcupdate.h to avoid including the RCU-internal rcu.h file outside of RCU. ] Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 32decf1a9c6c..f3706c6b2e21 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,6 +46,7 @@ #include #include +extern int rcu_expedited; /* for sysctl */ #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ -- cgit v1.2.3 From f207dbe63c61158c234e2e8929a3725a7f6b2b9b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 27 Feb 2014 12:20:31 +0100 Subject: Revert "sched/wait: Suppress Sparse 'variable shadowing' warning" This reverts commit 980f88e414418bf65569a3b62b08b07e6fc2f4c6. This warning is actually useful, don't suppress it. We actually rely on the shadowing for ___wait_cond_timeout(). We further used the __ret variable in __wait_event_timeout()'s cmd argument: __ret = schedule_timeout(__ret). That now explicitly uses the wrong __ret. Reported-by: Gregory CLEMENT Requested-by: Andrew Morton Requested-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-Q5blhuqqzwgVwvjf1gszrdol@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index c55ea5c24404..559044c79232 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -195,7 +195,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); ({ \ __label__ __out; \ wait_queue_t __wait; \ - long ___ret = ret; \ + long __ret = ret; \ \ INIT_LIST_HEAD(&__wait.task_list); \ if (exclusive) \ @@ -210,7 +210,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); break; \ \ if (___wait_is_interruptible(state) && __int) { \ - ___ret = __int; \ + __ret = __int; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: ___ret; \ +__out: __ret; \ }) #define __wait_event(wq, condition) \ -- cgit v1.2.3 From 6f285b19d09f72e801525f5eea1bdad22e559bf0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2014 19:44:55 -0800 Subject: audit: Send replies in the proper network namespace. In perverse cases of file descriptor passing the current network namespace of a process and the network namespace of a socket used by that socket may differ. Therefore use the network namespace of the appropiate socket to ensure replies always go to the appropiate socket. Signed-off-by: "Eric W. Biederman" --- include/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index aa865a9a4c4f..ec1464df4c60 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -43,6 +43,7 @@ struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; +struct sk_buff; struct audit_krule { int vers_ops; @@ -463,7 +464,7 @@ extern int audit_filter_user(int type); extern int audit_filter_type(int type); extern int audit_rule_change(int type, __u32 portid, int seq, void *data, size_t datasz); -extern int audit_list_rules_send(__u32 portid, int seq); +extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; #else /* CONFIG_AUDIT */ -- cgit v1.2.3 From b7e63a1079b266866a732cf699d8c4d61391bbda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Feb 2014 11:19:14 -0800 Subject: NFSv4: Fix another nfs4_sequence corruptor nfs4_release_lockowner needs to set the rpc_message reply to point to the nfs4_sequence_res in order to avoid another Oopsable situation in nfs41_assign_slot. Fixes: fbd4bfd1d9d21 (NFS: Add nfs4_sequence calls for RELEASE_LOCKOWNER) Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b2fb167b2e6d..5624e4e2763c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -467,9 +467,14 @@ struct nfs_lockt_res { }; struct nfs_release_lockowner_args { + struct nfs4_sequence_args seq_args; struct nfs_lowner lock_owner; }; +struct nfs_release_lockowner_res { + struct nfs4_sequence_res seq_res; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; -- cgit v1.2.3 From 03b8c7b623c80af264c4c8d6111e5c6289933666 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Mar 2014 13:09:47 +0100 Subject: futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test If an architecture has futex_atomic_cmpxchg_inatomic() implemented and there is no runtime check necessary, allow to skip the test within futex_init(). This allows to get rid of some code which would always give the same result, and also allows the compiler to optimize a couple of if statements away. Signed-off-by: Heiko Carstens Cc: Finn Thain Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20140302120947.GA3641@osiris Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index b0d95cac826e..6435f46d6e13 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -55,7 +55,11 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else extern int futex_cmpxchg_enabled; +#endif #else static inline void exit_robust_list(struct task_struct *curr) { -- cgit v1.2.3 From 45ab2813d40d88fc575e753c38478de242d03f88 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:37:38 -0500 Subject: tracing: Do not add event files for modules that fail tracepoints If a module fails to add its tracepoints due to module tainting, do not create the module event infrastructure in the debugfs directory. As the events will not work and worse yet, they will silently fail, making the user wonder why the events they enable do not display anything. Having a warning on module load and the events not visible to the users will make the cause of the problem much clearer. Link: http://lkml.kernel.org/r/20140227154923.265882695@goodmis.org Fixes: 6d723736e472 "tracing/events: add support for modules to TRACE_EVENT" Acked-by: Mathieu Desnoyers Cc: stable@vger.kernel.org # 2.6.31+ Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index accc497f8d72..7159a0a933df 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,6 +60,12 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; +bool trace_module_has_bad_taint(struct module *mod); +#else +static inline bool trace_module_has_bad_taint(struct module *mod) +{ + return false; +} #endif /* CONFIG_MODULES */ struct tracepoint_iter { -- cgit v1.2.3 From 0473c9b5f05948df780bbc7b996dd7aefc4ec41d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 10:44:03 +0100 Subject: compat: let architectures define __ARCH_WANT_COMPAT_SYS_GETDENTS64 For architecture dependent compat syscalls in common code an architecture must define something like __ARCH_WANT_ if it wants to use the code. This however is not true for compat_sys_getdents64 for which architectures must define __ARCH_OMIT_COMPAT_SYS_GETDENTS64 if they do not want the code. This leads to the situation where all architectures, except mips, get the compat code but only x86_64, arm64 and the generic syscall architectures actually use it. So invert the logic, so that architectures actively must do something to get the compat code. This way a couple of architectures get rid of otherwise dead code. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 3f448c65511b..beded18f992d 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -502,9 +502,11 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); +#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 asmlinkage long compat_sys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); +#endif asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); asmlinkage long compat_sys_open(const char __user *filename, int flags, -- cgit v1.2.3 From 217f4433fc2fe768a7f13f7e5586333bb8280e9e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2014 10:56:20 +0100 Subject: compat: add COMPAT_SYSCALL_DEFINE0 macro For consistency reason add a COMPAT_SYSCALL_DEFINE0 macro. This macro should be used for compat system calls with zero parameters. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index beded18f992d..4c42df3fce37 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -27,6 +27,9 @@ #define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) #endif +#define COMPAT_SYSCALL_DEFINE0(name) \ + asmlinkage long compat_sys_##name(void) + #define COMPAT_SYSCALL_DEFINE1(name, ...) \ COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE2(name, ...) \ -- cgit v1.2.3 From ab4f8bba19323eb78b7473df42b225eb14090fcc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 1 Mar 2014 13:45:03 +0100 Subject: s390/compat: automatic zero, sign and pointer conversion of syscalls Instead of explicitly changing compat system call parameters from e.g. unsigned long to compat_ulong_t let the COMPAT_SYSCALL_WRAP macros automatically detect (unsigned) long parameters and zero and sign extend them automatically. The resulting binary is completely identical. In addition add a sys_[system call name] prototype for each system call wrapper. This will cause compile errors if the prototype does not match the prototype in include/linux/syscall.h. Therefore we should now always get the correct zero and sign extension of system call parameters. Pointers are handled like before. Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a747a77ea584..1e67b7a5968c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -98,6 +98,8 @@ struct sigaltstack; #define __MAP(n,...) __MAP##n(__VA_ARGS__) #define __SC_DECL(t, a) t a +#define __TYPE_IS_L(t) (__same_type((t)0, 0L)) +#define __TYPE_IS_UL(t) (__same_type((t)0, 0UL)) #define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (t) a -- cgit v1.2.3 From 668f9abbd4334e6c29fa8acd71635c4f9101caa7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 3 Mar 2014 15:38:18 -0800 Subject: mm: close PageTail race Commit bf6bddf1924e ("mm: introduce compaction and migration for ballooned pages") introduces page_count(page) into memory compaction which dereferences page->first_page if PageTail(page). This results in a very rare NULL pointer dereference on the aforementioned page_count(page). Indeed, anything that does compound_head(), including page_count() is susceptible to racing with prep_compound_page() and seeing a NULL or dangling page->first_page pointer. This patch uses Andrea's implementation of compound_trans_head() that deals with such a race and makes it the default compound_head() implementation. This includes a read memory barrier that ensures that if PageTail(head) is true that we return a head page that is neither NULL nor dangling. The patch then adds a store memory barrier to prep_compound_page() to ensure page->first_page is set. This is the safest way to ensure we see the head page that we are expecting, PageTail(page) is already in the unlikely() path and the memory barriers are unfortunately required. Hugetlbfs is the exception, we don't enforce a store memory barrier during init since no race is possible. Signed-off-by: David Rientjes Cc: Holger Kiehl Cc: Christoph Lameter Cc: Rafael Aquini Cc: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Rik van Riel Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 41 ----------------------------------------- include/linux/mm.h | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index db512014e061..b826239bdce0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -157,46 +157,6 @@ static inline int hpage_nr_pages(struct page *page) return HPAGE_PMD_NR; return 1; } -/* - * compound_trans_head() should be used instead of compound_head(), - * whenever the "page" passed as parameter could be the tail of a - * transparent hugepage that could be undergoing a - * __split_huge_page_refcount(). The page structure layout often - * changes across releases and it makes extensive use of unions. So if - * the page structure layout will change in a way that - * page->first_page gets clobbered by __split_huge_page_refcount, the - * implementation making use of smp_rmb() will be required. - * - * Currently we define compound_trans_head as compound_head, because - * page->private is in the same union with page->first_page, and - * page->private isn't clobbered. However this also means we're - * currently leaving dirt into the page->private field of anonymous - * pages resulting from a THP split, instead of setting page->private - * to zero like for every other page that has PG_private not set. But - * anonymous pages don't use page->private so this is not a problem. - */ -#if 0 -/* This will be needed if page->private will be clobbered in split_huge_page */ -static inline struct page *compound_trans_head(struct page *page) -{ - if (PageTail(page)) { - struct page *head; - head = page->first_page; - smp_rmb(); - /* - * head may be a dangling pointer. - * __split_huge_page_refcount clears PageTail before - * overwriting first_page, so if PageTail is still - * there it means the head pointer isn't dangling. - */ - if (PageTail(page)) - return head; - } - return page; -} -#else -#define compound_trans_head(page) compound_head(page) -#endif extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp); @@ -226,7 +186,6 @@ static inline int split_huge_page(struct page *page) do { } while (0) #define split_huge_page_pmd_mm(__mm, __address, __pmd) \ do { } while (0) -#define compound_trans_head(page) compound_head(page) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { diff --git a/include/linux/mm.h b/include/linux/mm.h index f28f46eade6a..03ab3e58f511 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -399,8 +399,18 @@ static inline void compound_unlock_irqrestore(struct page *page, static inline struct page *compound_head(struct page *page) { - if (unlikely(PageTail(page))) - return page->first_page; + if (unlikely(PageTail(page))) { + struct page *head = page->first_page; + + /* + * page->first_page may be a dangling pointer to an old + * compound page, so recheck that it is still a tail + * page before returning. + */ + smp_rmb(); + if (likely(PageTail(page))) + return head; + } return page; } -- cgit v1.2.3 From 9050d7eba40b3d79551668f54e68fd6f51945ef3 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 3 Mar 2014 15:38:27 -0800 Subject: mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking Daniel Borkmann reported a VM_BUG_ON assertion failing: ------------[ cut here ]------------ kernel BUG at mm/mlock.c:528! invalid opcode: 0000 [#1] SMP Modules linked in: ccm arc4 iwldvm [...] video CPU: 3 PID: 2266 Comm: netsniff-ng Not tainted 3.14.0-rc2+ #8 Hardware name: LENOVO 2429BP3/2429BP3, BIOS G4ET37WW (1.12 ) 05/29/2012 task: ffff8801f87f9820 ti: ffff88002cb44000 task.ti: ffff88002cb44000 RIP: 0010:[] [] munlock_vma_pages_range+0x2e0/0x2f0 Call Trace: do_munmap+0x18f/0x3b0 vm_munmap+0x41/0x60 SyS_munmap+0x22/0x30 system_call_fastpath+0x1a/0x1f RIP munlock_vma_pages_range+0x2e0/0x2f0 ---[ end trace a0088dcf07ae10f2 ]--- because munlock_vma_pages_range() thinks it's unexpectedly in the middle of a THP page. This can be reproduced with default config since 3.11 kernels. A reproducer can be found in the kernel's selftest directory for networking by running ./psock_tpacket. The problem is that an order=2 compound page (allocated by alloc_one_pg_vec_page() is part of the munlocked VM_MIXEDMAP vma (mapped by packet_mmap()) and mistaken for a THP page and assumed to be order=9. The checks for THP in munlock came with commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP pages"), i.e. since 3.9, but did not trigger a bug. It just makes munlock_vma_pages_range() skip such compound pages until the next 512-pages-aligned page, when it encounters a head page. This is however not a problem for vma's where mlocking has no effect anyway, but it can distort the accounting. Since commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and munlock+putback using pagevec") this can trigger a VM_BUG_ON in PageTransHuge() check. This patch fixes the issue by adding VM_MIXEDMAP flag to VM_SPECIAL, a list of flags that make vma's non-mlockable and non-mergeable. The reasoning is that VM_MIXEDMAP vma's are similar to VM_PFNMAP, which is already on the VM_SPECIAL list, and both are intended for non-LRU pages where mlocking makes no sense anyway. Related Lkml discussion can be found in [2]. [1] tools/testing/selftests/net/psock_tpacket [2] https://lkml.org/lkml/2014/1/10/427 Signed-off-by: Vlastimil Babka Signed-off-by: Daniel Borkmann Reported-by: Daniel Borkmann Tested-by: Daniel Borkmann Cc: Thomas Hellstrom Cc: John David Anglin Cc: HATAYAMA Daisuke Cc: Konstantin Khlebnikov Cc: Carsten Otte Cc: Jared Hulbert Tested-by: Hannes Frederic Sowa Cc: Kirill A. Shutemov Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: [3.11.x+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 03ab3e58f511..a1fe25110f50 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -175,7 +175,7 @@ extern unsigned int kobjsize(const void *objp); * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) /* * mapping from the currently active vm_flags protection bits (the -- cgit v1.2.3 From 1ae71d03194ea7424cbd14e449581f67c463d20d Mon Sep 17 00:00:00 2001 From: Liu Ping Fan Date: Mon, 3 Mar 2014 15:38:39 -0800 Subject: mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS When doing some numa tests on powerpc, I triggered an oops bug. I find it is caused by using page->_last_cpupid. It should be initialized as "-1 & LAST_CPUPID_MASK", but not "-1". Otherwise, in task_numa_fault(), we will miss the checking (last_cpupid == (-1 & LAST_CPUPID_MASK)). And finally cause an oops bug in task_numa_group(), since the online cpu is less than possible cpu. This happen with CONFIG_SPARSE_VMEMMAP disabled Call trace: SMP NR_CPUS=64 NUMA PowerNV Modules linked in: CPU: 24 PID: 804 Comm: systemd-udevd Not tainted3.13.0-rc1+ #32 task: c000001e2746aa80 ti: c000001e32c50000 task.ti:c000001e32c50000 REGS: c000001e32c53510 TRAP: 0300 Not tainted(3.13.0-rc1+) MSR: 9000000000009032 CR:28024424 XER: 20000000 CFAR: c000000000009324 DAR: 7265717569726857 DSISR:40000000 SOFTE: 1 NIP .task_numa_fault+0x1470/0x2370 LR .task_numa_fault+0x1468/0x2370 Call Trace: .task_numa_fault+0x1468/0x2370 (unreliable) .do_numa_page+0x480/0x4a0 .handle_mm_fault+0x4ec/0xc90 .do_page_fault+0x3a8/0x890 handle_page_fault+0x10/0x30 Instruction dump: 3c82fefb 3884b138 48d9cff1 60000000 48000574 3c62fefb3863af78 3c82fefb 3884b138 48d9cfd5 60000000 e93f0100 <812902e4> 7d2907b45529063e 7d2a07b4 ---[ end trace 15f2510da5ae07cf ]--- Signed-off-by: Liu Ping Fan Signed-off-by: Aneesh Kumar K.V Acked-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a1fe25110f50..c1b7414c7bef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -767,7 +767,7 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { - return xchg(&page->_last_cpupid, cpupid); + return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); } static inline int page_cpupid_last(struct page *page) @@ -776,7 +776,7 @@ static inline int page_cpupid_last(struct page *page) } static inline void page_cpupid_reset_last(struct page *page) { - page->_last_cpupid = -1; + page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else static inline int page_cpupid_last(struct page *page) -- cgit v1.2.3 From 3e909599215456928e6b42a04f11c2517881570b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:21:22 +0000 Subject: efi: Move facility flags to struct efi As we grow support for more EFI architectures they're going to want the ability to query which EFI features are available on the running system. Instead of storing this information in an architecture-specific place, stick it in the global 'struct efi', which is already the central location for EFI state. While we're at it, let's change the return value of efi_enabled() to be bool and replace all references to 'facility' with 'feature', which is the usual word used to describe the attributes of the running system. Signed-off-by: Matt Fleming --- include/linux/efi.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 0a819e7a60c9..214833b4a97d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -573,6 +573,7 @@ extern struct efi { efi_reset_system_t *reset_system; efi_set_virtual_address_map_t *set_virtual_address_map; struct efi_memory_map *memmap; + unsigned long flags; } efi; static inline int @@ -660,17 +661,24 @@ extern int __init efi_setup_pcdp_console(char *); #ifdef CONFIG_EFI # ifdef CONFIG_X86 -extern int efi_enabled(int facility); + +/* + * Test whether the above EFI_* bits are enabled. + */ +static inline bool efi_enabled(int feature) +{ + return test_bit(feature, &efi.flags) != 0; +} # else -static inline int efi_enabled(int facility) +static inline bool efi_enabled(int feature) { - return 1; + return true; } # endif #else -static inline int efi_enabled(int facility) +static inline bool efi_enabled(int feature) { - return 0; + return false; } #endif -- cgit v1.2.3 From 092063808c498eccac8e891973bf143e7b60d723 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:49:51 +0000 Subject: ia64/efi: Implement efi_enabled() There's no good reason to keep efi_enabled() under CONFIG_X86 anymore, since nothing about the implementation is specific to x86. Set EFI feature flags in the ia64 boot path instead of claiming to support all features. The old behaviour was actually buggy since efi.memmap never points to a valid memory map, so we shouldn't be claiming to support EFI_MEMMAP. Fortunately, this bug was never triggered because EFI_MEMMAP isn't used outside of arch/x86 currently, but that may not always be the case. Reviewed-and-tested-by: Tony Luck Signed-off-by: Matt Fleming --- include/linux/efi.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 214833b4a97d..64d532ca890a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -660,8 +660,6 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_ARCH_1 6 /* First arch-specific bit */ #ifdef CONFIG_EFI -# ifdef CONFIG_X86 - /* * Test whether the above EFI_* bits are enabled. */ @@ -669,12 +667,6 @@ static inline bool efi_enabled(int feature) { return test_bit(feature, &efi.flags) != 0; } -# else -static inline bool efi_enabled(int feature) -{ - return true; -} -# endif #else static inline bool efi_enabled(int feature) { -- cgit v1.2.3 From 792d0018a5fe31ef8ef9d07a7a02081d4abdf6b7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:14 +0000 Subject: genirq: Add a kstat helper to increment irq stats There is a common pattern all over the place: kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); This results in a call to core code anyway. So provide a function which does the same thing in core. While at it, replace the butt ugly macro with an inline. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20140223212737.422068876@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/kernel_stat.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be4a7c3..54ec7e0a7d72 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -54,11 +54,13 @@ extern unsigned long long nr_context_switches(void); #include extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); -#define kstat_incr_irqs_this_cpu(irqno, DESC) \ -do { \ - __this_cpu_inc(*(DESC)->kstat_irqs); \ - __this_cpu_inc(kstat.irqs_sum); \ -} while (0) +static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) +{ + __this_cpu_inc(*desc->kstat_irqs); + __this_cpu_inc(kstat.irqs_sum); +} + +extern void kstat_incr_irq_this_cpu(unsigned int irq); static inline void kstat_incr_softirqs_this_cpu(unsigned int irq) { -- cgit v1.2.3 From 8f945a3325bbe0dd651e2f496a53df9b06fc6d07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:23 +0000 Subject: genirq: Move kstat_incr_irqs_this_cpu() to core No more users outside the core code. Put it into the poison cabinet. That also gets rid of the linux/irq.h include in kernel_stat.h Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20140223212739.124207133@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/kernel_stat.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 54ec7e0a7d72..c3fbda7690d6 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -51,15 +51,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); extern unsigned long long nr_context_switches(void); -#include extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); - -static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) -{ - __this_cpu_inc(*desc->kstat_irqs); - __this_cpu_inc(kstat.irqs_sum); -} - extern void kstat_incr_irq_this_cpu(unsigned int irq); static inline void kstat_incr_softirqs_this_cpu(unsigned int irq) -- cgit v1.2.3 From 677703cef0a148ba07d37ced649ad25b1cda2f78 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 13:47:37 +0000 Subject: efi: Add separate 32-bit/64-bit definitions The traditional approach of using machine-specific types such as 'unsigned long' does not allow the kernel to interact with firmware running in a different CPU mode, e.g. 64-bit kernel with 32-bit EFI. Add distinct EFI structure definitions for both 32-bit and 64-bit so that we can use them in the 32-bit and 64-bit code paths. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming --- include/linux/efi.h | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 0a819e7a60c9..42c662729e13 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -153,6 +153,102 @@ typedef struct { u8 sets_to_zero; } efi_time_cap_t; +typedef struct { + efi_table_hdr_t hdr; + u32 raise_tpl; + u32 restore_tpl; + u32 allocate_pages; + u32 free_pages; + u32 get_memory_map; + u32 allocate_pool; + u32 free_pool; + u32 create_event; + u32 set_timer; + u32 wait_for_event; + u32 signal_event; + u32 close_event; + u32 check_event; + u32 install_protocol_interface; + u32 reinstall_protocol_interface; + u32 uninstall_protocol_interface; + u32 handle_protocol; + u32 __reserved; + u32 register_protocol_notify; + u32 locate_handle; + u32 locate_device_path; + u32 install_configuration_table; + u32 load_image; + u32 start_image; + u32 exit; + u32 unload_image; + u32 exit_boot_services; + u32 get_next_monotonic_count; + u32 stall; + u32 set_watchdog_timer; + u32 connect_controller; + u32 disconnect_controller; + u32 open_protocol; + u32 close_protocol; + u32 open_protocol_information; + u32 protocols_per_handle; + u32 locate_handle_buffer; + u32 locate_protocol; + u32 install_multiple_protocol_interfaces; + u32 uninstall_multiple_protocol_interfaces; + u32 calculate_crc32; + u32 copy_mem; + u32 set_mem; + u32 create_event_ex; +} __packed efi_boot_services_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 raise_tpl; + u64 restore_tpl; + u64 allocate_pages; + u64 free_pages; + u64 get_memory_map; + u64 allocate_pool; + u64 free_pool; + u64 create_event; + u64 set_timer; + u64 wait_for_event; + u64 signal_event; + u64 close_event; + u64 check_event; + u64 install_protocol_interface; + u64 reinstall_protocol_interface; + u64 uninstall_protocol_interface; + u64 handle_protocol; + u64 __reserved; + u64 register_protocol_notify; + u64 locate_handle; + u64 locate_device_path; + u64 install_configuration_table; + u64 load_image; + u64 start_image; + u64 exit; + u64 unload_image; + u64 exit_boot_services; + u64 get_next_monotonic_count; + u64 stall; + u64 set_watchdog_timer; + u64 connect_controller; + u64 disconnect_controller; + u64 open_protocol; + u64 close_protocol; + u64 open_protocol_information; + u64 protocols_per_handle; + u64 locate_handle_buffer; + u64 locate_protocol; + u64 install_multiple_protocol_interfaces; + u64 uninstall_multiple_protocol_interfaces; + u64 calculate_crc32; + u64 copy_mem; + u64 set_mem; + u64 create_event_ex; +} __packed efi_boot_services_64_t; + /* * EFI Boot Services table */ @@ -231,12 +327,61 @@ typedef enum { EfiPciIoAttributeOperationMaximum } EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION; +typedef struct { + u32 read; + u32 write; +} efi_pci_io_protocol_access_32_t; + +typedef struct { + u64 read; + u64 write; +} efi_pci_io_protocol_access_64_t; typedef struct { void *read; void *write; } efi_pci_io_protocol_access_t; +typedef struct { + u32 poll_mem; + u32 poll_io; + efi_pci_io_protocol_access_32_t mem; + efi_pci_io_protocol_access_32_t io; + efi_pci_io_protocol_access_32_t pci; + u32 copy_mem; + u32 map; + u32 unmap; + u32 allocate_buffer; + u32 free_buffer; + u32 flush; + u32 get_location; + u32 attributes; + u32 get_bar_attributes; + u32 set_bar_attributes; + uint64_t romsize; + void *romimage; +} efi_pci_io_protocol_32; + +typedef struct { + u64 poll_mem; + u64 poll_io; + efi_pci_io_protocol_access_64_t mem; + efi_pci_io_protocol_access_64_t io; + efi_pci_io_protocol_access_64_t pci; + u64 copy_mem; + u64 map; + u64 unmap; + u64 allocate_buffer; + u64 free_buffer; + u64 flush; + u64 get_location; + u64 attributes; + u64 get_bar_attributes; + u64 set_bar_attributes; + uint64_t romsize; + void *romimage; +} efi_pci_io_protocol_64; + typedef struct { void *poll_mem; void *poll_io; @@ -290,6 +435,42 @@ typedef struct { #define EFI_RUNTIME_SERVICES_SIGNATURE ((u64)0x5652453544e5552ULL) #define EFI_RUNTIME_SERVICES_REVISION 0x00010000 +typedef struct { + efi_table_hdr_t hdr; + u32 get_time; + u32 set_time; + u32 get_wakeup_time; + u32 set_wakeup_time; + u32 set_virtual_address_map; + u32 convert_pointer; + u32 get_variable; + u32 get_next_variable; + u32 set_variable; + u32 get_next_high_mono_count; + u32 reset_system; + u32 update_capsule; + u32 query_capsule_caps; + u32 query_variable_info; +} efi_runtime_services_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 get_time; + u64 set_time; + u64 get_wakeup_time; + u64 set_wakeup_time; + u64 set_virtual_address_map; + u64 convert_pointer; + u64 get_variable; + u64 get_next_variable; + u64 set_variable; + u64 get_next_high_mono_count; + u64 reset_system; + u64 update_capsule; + u64 query_capsule_caps; + u64 query_variable_info; +} efi_runtime_services_64_t; + typedef struct { efi_table_hdr_t hdr; void *get_time; @@ -483,6 +664,38 @@ struct efi_memory_map { unsigned long desc_size; }; +typedef struct { + u32 revision; + u32 parent_handle; + u32 system_table; + u32 device_handle; + u32 file_path; + u32 reserved; + u32 load_options_size; + u32 load_options; + u32 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + unsigned long unload; +} efi_loaded_image_32_t; + +typedef struct { + u32 revision; + u64 parent_handle; + u64 system_table; + u64 device_handle; + u64 file_path; + u64 reserved; + u32 load_options_size; + u64 load_options; + u64 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + unsigned long unload; +} efi_loaded_image_64_t; + typedef struct { u32 revision; void *parent_handle; @@ -511,6 +724,34 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; +typedef struct { + u64 revision; + u32 open; + u32 close; + u32 delete; + u32 read; + u32 write; + u32 get_position; + u32 set_position; + u32 get_info; + u32 set_info; + u32 flush; +} efi_file_handle_32_t; + +typedef struct { + u64 revision; + u64 open; + u64 close; + u64 delete; + u64 read; + u64 write; + u64 get_position; + u64 set_position; + u64 get_info; + u64 set_info; + u64 flush; +} efi_file_handle_64_t; + typedef struct _efi_file_handle { u64 revision; efi_status_t (*open)(struct _efi_file_handle *, @@ -809,6 +1050,17 @@ struct efivar_entry { bool deleting; }; +struct efi_simple_text_output_protocol_32 { + u32 reset; + u32 output_string; + u32 test_string; +}; + +struct efi_simple_text_output_protocol_64 { + u64 reset; + u64 output_string; + u64 test_string; +}; struct efi_simple_text_output_protocol { void *reset; -- cgit v1.2.3 From 291fdb0bcebd5e8db6af767c1fdc522167dad73d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 12:39:03 +0100 Subject: ipc/compat_sys_msgrcv: change msgtyp type from long to compat_long_t Change the type of compat_sys_msgrcv's msgtyp parameter from long to compat_long_t, since compat user space passes only a 32 bit signed value. Let the compat wrapper do proper sign extension to 64 bit of this parameter. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 4c42df3fce37..179b1da9e19f 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -321,7 +321,7 @@ asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, - compat_ssize_t msgsz, long msgtyp, int msgflg); + compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); long compat_sys_msgctl(int first, int second, void __user *uptr); long compat_sys_shmctl(int first, int second, void __user *uptr); long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, -- cgit v1.2.3 From 378a10f3ae2e8a67ecc8f548c8e5ff25881bd88a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Mar 2014 10:43:51 +0100 Subject: fs/compat: optional preadv64/pwrite64 compat system calls The preadv64/pwrite64 have been implemented for the x32 ABI, in order to allow passing 64 bit arguments from user space without splitting them into two 32 bit parameters, like it would be necessary for usual compat tasks. Howevert these two system calls are only being used for the x32 ABI, so add __ARCH_WANT_COMPAT defines for these two compat syscalls and make these two only visible for x86. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 179b1da9e19f..1c457428ec0a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -340,6 +340,19 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, const struct compat_iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); + +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +asmlinkage long compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +asmlinkage long compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, -- cgit v1.2.3 From 932602e238329da99f8482c1b721549531fbfe7f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 16:07:52 +0100 Subject: fs/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types Some fs compat system calls have unsigned long parameters instead of compat_ulong_t. In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters their corresponding 32 bit counterparts. compat_sys_io_getevents() is a bit different: the non-compat version has signed parameters for the "min_nr" and "nr" parameters while the compat version has unsigned parameters. So change this as well. For all practical purposes this shouldn't make any difference (doesn't fix a real bug). Also introduce a generic compat_aio_context_t type which can be used everywhere. The access_ok() check within compat_sys_io_getevents() got also removed since the non-compat sys_io_getevents() should be able to handle everything anyway. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 1c457428ec0a..fea8ee9afe22 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -71,6 +71,8 @@ typedef struct compat_sigaltstack { typedef __compat_uid32_t compat_uid_t; typedef __compat_gid32_t compat_gid_t; +typedef compat_ulong_t compat_aio_context_t; + struct compat_sel_arg_struct; struct rusage; @@ -497,20 +499,20 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - unsigned long arg); + compat_ulong_t arg); asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg); + compat_ulong_t arg); asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); -asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id, - unsigned long min_nr, - unsigned long nr, +asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, + compat_long_t min_nr, + compat_long_t nr, struct io_event __user *events, struct compat_timespec __user *timeout); -asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr, +asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, u32 __user *iocb); asmlinkage long compat_sys_mount(const char __user *dev_name, const char __user *dir_name, - const char __user *type, unsigned long flags, + const char __user *type, compat_ulong_t flags, const void __user *data); asmlinkage long compat_sys_old_readdir(unsigned int fd, struct compat_old_linux_dirent __user *, @@ -633,7 +635,7 @@ asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); + compat_ulong_t arg); asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, struct compat_timespec __user *utime, u32 __user *uaddr2, u32 val3); -- cgit v1.2.3 From 8eee9093cdbeb2aa89d67dc1a3fd118acabaea52 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 16:19:16 +0100 Subject: ipc/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index fea8ee9afe22..f670e591aff8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -655,11 +655,11 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name, struct compat_mq_attr __user *u_attr); asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, - size_t msg_len, unsigned int msg_prio, + compat_size_t msg_len, unsigned int msg_prio, const struct compat_timespec __user *u_abs_timeout); asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, - size_t msg_len, unsigned int __user *u_msg_prio, + compat_size_t msg_len, unsigned int __user *u_msg_prio, const struct compat_timespec __user *u_abs_timeout); asmlinkage long compat_sys_socketcall(int call, u32 __user *args); asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); -- cgit v1.2.3 From 3a49a0f7181c243aa04e6c5e44ca70a90ead8f9a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 17:09:57 +0100 Subject: net/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f670e591aff8..8e636211f334 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -569,9 +569,9 @@ asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags); -asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, +asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, unsigned flags); -asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, +asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned flags, struct sockaddr __user *addr, int __user *addrlen); asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, -- cgit v1.2.3 From ca2c405ab90591dcb1bc3765467cbdf2b99a0f6a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 17:13:42 +0100 Subject: kexec/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 6 +++--- include/linux/kexec.h | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 8e636211f334..ef4834c5bcab 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -641,10 +641,10 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, u32 val3); asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen); -asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, +asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, + compat_ulong_t nr_segments, struct compat_kexec_segment __user *, - unsigned long flags); + compat_ulong_t flags); asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, const struct compat_mq_attr __user *u_mqstat, struct compat_mq_attr __user *u_omqstat); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 6d4066cdb5b5..a75641930049 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -127,12 +127,6 @@ extern asmlinkage long sys_kexec_load(unsigned long entry, struct kexec_segment __user *segments, unsigned long flags); extern int kernel_kexec(void); -#ifdef CONFIG_COMPAT -extern asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct compat_kexec_segment __user *segments, - unsigned long flags); -#endif extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); extern void crash_kexec(struct pt_regs *); -- cgit v1.2.3 From 2f2728f6de9837abe4b354443a45be578fbbf942 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 17:18:23 +0100 Subject: mm/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ef4834c5bcab..7c765624b7ef 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -469,7 +469,7 @@ asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, asmlinkage long compat_sys_timerfd_gettime(int ufd, struct compat_itimerspec __user *otmr); -asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, +asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, __u32 __user *pages, const int __user *nodes, int __user *status, @@ -674,12 +674,12 @@ extern void __user *compat_alloc_user_space(unsigned long len); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, const struct compat_iovec __user *lvec, - unsigned long liovcnt, const struct compat_iovec __user *rvec, - unsigned long riovcnt, unsigned long flags); + compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, + compat_ulong_t riovcnt, compat_ulong_t flags); asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, const struct compat_iovec __user *lvec, - unsigned long liovcnt, const struct compat_iovec __user *rvec, - unsigned long riovcnt, unsigned long flags); + compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, + compat_ulong_t riovcnt, compat_ulong_t flags); asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); -- cgit v1.2.3 From 70044d71d31d6973665ced5be04ef39ac1c09a48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:19:57 -0500 Subject: firewire: don't use PREPARE_DELAYED_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. firewire core-device and sbp2 have been been multiplexing work items with multiple work functions. Introduce fw_device_workfn() and sbp2_lu_workfn() which invoke fw_device->workfn and sbp2_logical_unit->workfn respectively and always use the two functions as the work functions and update the users to set the ->workfn fields instead of overriding work functions using PREPARE_DELAYED_WORK(). This fixes a variety of possible regressions since a2c1c57be8d9 "workqueue: consider work function when searching for busy work items" due to which fw_workqueue lost its required non-reentrancy property. Signed-off-by: Tejun Heo Acked-by: Stefan Richter Cc: linux1394-devel@lists.sourceforge.net Cc: stable@vger.kernel.org # v3.9+ Cc: stable@vger.kernel.org # v3.8.2+ Cc: stable@vger.kernel.org # v3.4.60+ Cc: stable@vger.kernel.org # v3.2.40+ --- include/linux/firewire.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 5d7782e42b8f..c3683bdf28fe 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -200,6 +200,7 @@ struct fw_device { unsigned irmc:1; unsigned bc_implemented:2; + work_func_t workfn; struct delayed_work work; struct fw_attribute_group attribute_group; }; -- cgit v1.2.3 From 9ca9737444f1a8602f74b85018d881e7e54b5bd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:49 -0500 Subject: nvme: don't use PREPARE_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. nvme_dev->reset_work is multiplexed with multiple work functions. Introduce nvme_reset_workfn() which invokes nvme_dev->reset_workfn and always use it as the work function and update the users to set the ->reset_workfn field instead of overriding the work function using PREPARE_WORK(). It would probably be best to route this with other related updates through the workqueue tree. Compile tested. Signed-off-by: Tejun Heo Cc: Matthew Wilcox Cc: linux-nvme@lists.infradead.org --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 69ae03f6eb15..6b9aafed225f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,6 +87,7 @@ struct nvme_dev { struct list_head namespaces; struct kref kref; struct miscdevice miscdev; + work_func_t reset_workfn; struct work_struct reset_work; char name[12]; char serial[20]; -- cgit v1.2.3 From f073f9229ff1137d3be20558bec3bfb77e3af2a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:50 -0500 Subject: workqueue: remove PREPARE_[DELAYED_]WORK() Peter Hurley noticed that since a2c1c57be8d9 ("workqueue: consider work function when searching for busy work items"), a work item which gets assigned a different work function would break out of the non-reentrancy guarantee as workqueue would consider it a different work item. This is fragile and extremely subtle. PREPARE_[DELAYED_]WORK() have never been used widely and its semantics has always been somewhat iffy. If the work item is known not to be on queue when PREPARE_WORK() is called, there's no difference from using INIT_WORK(). If the work item may be queued at the time of PREPARE_WORK(), we can't really tell whether the old or new function will be executed the next time. We really don't want this level of subtlety in workqueue interface for such marginal use cases. The previous patches converted all existing users away from PREPARE_[DELAYED_]WORK(). Let's remove them. Signed-off-by: Tejun Heo Cc: Peter Hurley Link: http://lkml.kernel.org/g/1392493119-9277-1-git-send-email-peter@hurleysoftware.com --- include/linux/workqueue.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 8059334a6b02..29da9e77c3bb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -177,17 +177,6 @@ struct execute_work { #define DECLARE_DEFERRABLE_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE) -/* - * initialize a work item's function pointer - */ -#define PREPARE_WORK(_work, _func) \ - do { \ - (_work)->func = (_func); \ - } while (0) - -#define PREPARE_DELAYED_WORK(_work, _func) \ - PREPARE_WORK(&(_work)->work, (_func)) - #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); @@ -217,7 +206,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \ INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ + (_work)->func = (_func); \ } while (0) #else #define __INIT_WORK(_work, _func, _onstack) \ @@ -225,7 +214,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ + (_work)->func = (_func); \ } while (0) #endif -- cgit v1.2.3 From 52a4c6404f91f2d2c5592ee6365a8418c4565f53 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Mar 2014 12:44:19 +0100 Subject: selinux: add gfp argument to security_xfrm_policy_alloc and fix callers security_xfrm_policy_alloc can be called in atomic context so the allocation should be done with GFP_ATOMIC. Add an argument to let the callers choose the appropriate way. In order to do so a gfp argument needs to be added to the method xfrm_policy_alloc_security in struct security_operations and to the internal function selinux_xfrm_alloc_user. After that switch to GFP_ATOMIC in the atomic callers and leave GFP_KERNEL as before for the rest. The path that needed the gfp argument addition is: security_xfrm_policy_alloc -> security_ops.xfrm_policy_alloc_security -> all users of xfrm_policy_alloc_security (e.g. selinux_xfrm_policy_alloc) -> selinux_xfrm_alloc_user (here the allocation used to be GFP_KERNEL only) Now adding a gfp argument to selinux_xfrm_alloc_user requires us to also add it to security_context_to_sid which is used inside and prior to this patch did only GFP_KERNEL allocation. So add gfp argument to security_context_to_sid and adjust all of its callers as well. CC: Paul Moore CC: Dave Jones CC: Steffen Klassert CC: Fan Du CC: David S. Miller CC: LSM list CC: SELinux list Signed-off-by: Nikolay Aleksandrov Acked-by: Paul Moore Signed-off-by: Steffen Klassert --- include/linux/security.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 5623a7f965b7..2fc42d191f79 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1040,6 +1040,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Allocate a security structure to the xp->security field; the security * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) + * @gfp is to specify the context for the allocation * @xfrm_policy_clone_security: * @old_ctx contains an existing xfrm_sec_ctx. * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. @@ -1683,7 +1684,7 @@ struct security_operations { #ifdef CONFIG_SECURITY_NETWORK_XFRM int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx); + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); @@ -2859,7 +2860,8 @@ static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx); +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx); int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); @@ -2877,7 +2879,9 @@ void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); #else /* CONFIG_SECURITY_NETWORK_XFRM */ -static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { return 0; } -- cgit v1.2.3 From 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Mar 2014 09:36:58 -0800 Subject: vfs: atomic f_pos accesses as per POSIX Our write() system call has always been atomic in the sense that you get the expected thread-safe contiguous write, but we haven't actually guaranteed that concurrent writes are serialized wrt f_pos accesses, so threads (or processes) that share a file descriptor and use "write()" concurrently would quite likely overwrite each others data. This violates POSIX.1-2008/SUSv4 Section XSI 2.9.7 that says: "2.9.7 Thread Interactions with Regular File Operations All of the following functions shall be atomic with respect to each other in the effects specified in POSIX.1-2008 when they operate on regular files or symbolic links: [...]" and one of the effects is the file position update. This unprotected file position behavior is not new behavior, and nobody has ever cared. Until now. Yongzhi Pan reported unexpected behavior to Michael Kerrisk that was due to this. This resolves the issue with a f_pos-specific lock that is taken by read/write/lseek on file descriptors that may be shared across threads or processes. Reported-by: Yongzhi Pan Reported-by: Michael Kerrisk Cc: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/file.h | 6 ++++-- include/linux/fs.h | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index cbacf4faf447..f2517fa2d610 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -28,12 +28,14 @@ static inline void fput_light(struct file *file, int fput_needed) struct fd { struct file *file; - int need_put; + unsigned int flags; }; +#define FDPUT_FPUT 1 +#define FDPUT_POS_UNLOCK 2 static inline void fdput(struct fd fd) { - if (fd.need_put) + if (fd.flags & FDPUT_FPUT) fput(fd.file); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 60829565e552..ebfde04bca06 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) +/* File needs atomic accesses to f_pos */ +#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) @@ -780,13 +783,14 @@ struct file { const struct file_operations *f_op; /* - * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR. + * Protects f_ep_links, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; + struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; -- cgit v1.2.3 From bd2a31d522344b3ac2fb680bd2366e77a9bd8209 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 14:54:22 -0500 Subject: get rid of fget_light() instead of returning the flags by reference, we can just have the low-level primitive return those in lower bits of unsigned long, with struct file * derived from the rest. Signed-off-by: Al Viro --- include/linux/file.h | 21 +++++++++++---------- include/linux/fs.h | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index f2517fa2d610..4d69123377a2 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -40,23 +40,24 @@ static inline void fdput(struct fd fd) } extern struct file *fget(unsigned int fd); -extern struct file *fget_light(unsigned int fd, int *fput_needed); +extern struct file *fget_raw(unsigned int fd); +extern unsigned long __fdget(unsigned int fd); +extern unsigned long __fdget_raw(unsigned int fd); +extern unsigned long __fdget_pos(unsigned int fd); -static inline struct fd fdget(unsigned int fd) +static inline struct fd __to_fd(unsigned long v) { - int b; - struct file *f = fget_light(fd, &b); - return (struct fd){f,b}; + return (struct fd){(struct file *)(v & ~3),v & 3}; } -extern struct file *fget_raw(unsigned int fd); -extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); +static inline struct fd fdget(unsigned int fd) +{ + return __to_fd(__fdget(fd)); +} static inline struct fd fdget_raw(unsigned int fd) { - int b; - struct file *f = fget_raw_light(fd, &b); - return (struct fd){f,b}; + return __to_fd(__fdget_raw(fd)); } extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index ebfde04bca06..23b2a35d712e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -812,7 +812,7 @@ struct file { #ifdef CONFIG_DEBUG_WRITECOUNT unsigned long f_mnt_write_state; #endif -}; +} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; -- cgit v1.2.3 From e97ca8e5b864f88b028c1759ba8536fa827d6d96 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 10 Mar 2014 15:49:43 -0700 Subject: mm: fix GFP_THISNODE callers and clarify GFP_THISNODE is for callers that implement their own clever fallback to remote nodes. It restricts the allocation to the specified node and does not invoke reclaim, assuming that the caller will take care of it when the fallback fails, e.g. through a subsequent allocation request without GFP_THISNODE set. However, many current GFP_THISNODE users only want the node exclusive aspect of the flag, without actually implementing their own fallback or triggering reclaim if necessary. This results in things like page migration failing prematurely even when there is easily reclaimable memory available, unless kswapd happens to be running already or a concurrent allocation attempt triggers the necessary reclaim. Convert all callsites that don't implement their own fallback strategy to __GFP_THISNODE. This restricts the allocation a single node too, but at the same time allows the allocator to enter the slowpath, wake kswapd, and invoke direct reclaim if necessary, to make the allocation happen when memory is full. Signed-off-by: Johannes Weiner Acked-by: Rik van Riel Cc: Jan Stancek Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++++ include/linux/mmzone.h | 4 ++-- include/linux/slab.h | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0437439bc047..39b81dc7d01a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -123,6 +123,10 @@ struct vm_area_struct; __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ __GFP_NO_KSWAPD) +/* + * GFP_THISNODE does not perform any reclaim, you most likely want to + * use __GFP_THISNODE to allocate from a given node without fallback! + */ #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5f2052c83154..9b61b9bf81ac 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -590,10 +590,10 @@ static inline bool zone_is_empty(struct zone *zone) /* * The NUMA zonelists are doubled because we need zonelists that restrict the - * allocations to a single node for GFP_THISNODE. + * allocations to a single node for __GFP_THISNODE. * * [0] : Zonelist with fallback - * [1] : No fallback (GFP_THISNODE) + * [1] : No fallback (__GFP_THISNODE) */ #define MAX_ZONELISTS 2 diff --git a/include/linux/slab.h b/include/linux/slab.h index 9260abdd67df..b5b2df60299e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -410,7 +410,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %GFP_NOWAIT - Allocation will not sleep. * - * %GFP_THISNODE - Allocate node-local memory only. + * %__GFP_THISNODE - Allocate node-local memory only. * * %GFP_DMA - Allocation suitable for DMA. * Should only be used for kmalloc() caches. Otherwise, use a -- cgit v1.2.3 From c9122da1e2d29bd6a1475a0d1ce2aa6ac6ea25fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 13:32:16 +0100 Subject: locking: Move mcs_spinlock.h into kernel/locking/ The mcs_spinlock code is not meant (or suitable) as a generic locking primitive, therefore take it away from the normal includes and place it in kernel/locking/. This way the locking primitives implemented there can use it as part of their implementation but we do not risk it getting used inapropriately. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-byirmpamgr7h25m5kyavwpzx@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 114 ------------------------------------------- 1 file changed, 114 deletions(-) delete mode 100644 include/linux/mcs_spinlock.h (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h deleted file mode 100644 index f2a5c6360083..000000000000 --- a/include/linux/mcs_spinlock.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS lock. - * - * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock - * with the desirable properties of being fair, and with each cpu trying - * to acquire the lock spinning on a local variable. - * It avoids expensive cache bouncings that common test-and-set spin-lock - * implementations incur. - */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H - -#include - -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ -}; - -#ifndef arch_mcs_spin_lock_contended -/* - * Using smp_load_acquire() provides a memory barrier that ensures - * subsequent operations happen after the lock is acquired. - */ -#define arch_mcs_spin_lock_contended(l) \ -do { \ - while (!(smp_load_acquire(l))) \ - arch_mutex_cpu_relax(); \ -} while (0) -#endif - -#ifndef arch_mcs_spin_unlock_contended -/* - * smp_store_release() provides a memory barrier to ensure all - * operations in the critical section has been completed before - * unlocking. - */ -#define arch_mcs_spin_unlock_contended(l) \ - smp_store_release((l), 1) -#endif - -/* - * Note: the smp_load_acquire/smp_store_release pair is not - * sufficient to form a full memory barrier across - * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. - * For applications that need a full barrier across multiple cpus - * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be - * used after mcs_lock. - */ - -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->locked - * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -static inline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* - * Lock acquired, don't need to set node->locked to 1. Threads - * only spin on its own node->locked value for lock acquisition. - * However, since this thread can immediately acquire the lock - * and does not proceed to spin on its own node->locked, this - * value won't be used. If a debug mode is needed to - * audit lock status, then set node->locked value here. - */ - return; - } - ACCESS_ONCE(prev->next) = node; - - /* Wait until the lock holder passes the lock down. */ - arch_mcs_spin_lock_contended(&node->locked); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static inline -void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg(lock, node, NULL) == node)) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - - /* Pass lock to next waiter. */ - arch_mcs_spin_unlock_contended(&next->locked); -} - -#endif /* __LINUX_MCS_SPINLOCK_H */ -- cgit v1.2.3 From fb0527bd5ea99bfeb2dd91e3c1433ecf745d6b99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Jan 2014 12:51:42 +0100 Subject: locking/mutexes: Introduce cancelable MCS lock for adaptive spinning Since we want a task waiting for a mutex_lock() to go to sleep and reschedule on need_resched() we must be able to abort the mcs_spin_lock() around the adaptive spin. Therefore implement a cancelable mcs lock. Signed-off-by: Peter Zijlstra Cc: chegu_vinod@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: Waiman.Long@hp.com Cc: torvalds@linux-foundation.org Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: Jason Low Link: http://lkml.kernel.org/n/tip-62hcl5wxydmjzd182zhvk89m@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index c482e1d2cc49..11692dea18aa 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,7 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct mcs_spinlock; +struct optimistic_spin_queue; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -56,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ + struct optimistic_spin_queue *osq; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; -- cgit v1.2.3 From c1bacbae8192dd2a9ebadd22d793b68054f6c6e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 8 Mar 2014 08:59:58 +0100 Subject: genirq: Provide irq_request/release_resources chip callbacks For certain irq types, e.g. gpios, it's necessary to request resources before starting up the irq. This might fail so we cannot use the irq_startup() callback because we might call the irq_set_type() callback before that which does not make sense when the resource is not available. Calling irq_startup() before irq_set_type() can lead to spurious interrupts which is not desired either. Signed-off-by: Thomas Gleixner Cc: Jean-Jacques Hiblot Cc: Grant Likely Cc: linux-arm-kernel@lists.infradead.org Reviewed-by: Linus Walleij Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1403080857160.18573@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7dc10036eff5..e675971bdc3f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -303,6 +303,10 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_calc_mask: Optional function to set irq_data.mask for special cases * @irq_print_chip: optional to print special chip info in show_interrupts + * @irq_request_resources: optional to request resources before calling + * any other callback related to this irq + * @irq_release_resources: optional to release resources acquired with + * irq_request_resources * @flags: chip specific flags */ struct irq_chip { @@ -336,6 +340,8 @@ struct irq_chip { void (*irq_calc_mask)(struct irq_data *data); void (*irq_print_chip)(struct irq_data *data, struct seq_file *p); + int (*irq_request_resources)(struct irq_data *data); + void (*irq_release_resources)(struct irq_data *data); unsigned long flags; }; -- cgit v1.2.3 From bfc3f0281e08066fa8111c3972cff6edc1049864 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:33:42 +0100 Subject: cputime: Default implementation of nsecs -> cputime conversion The architectures that override cputime_t (s390, ppc) don't provide any version of nsecs_to_cputime(). Indeed this cputime_t implementation by backend only happens when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y under which the core code doesn't make any use of nsecs_to_cputime(). At least for now. We are going to make a broader use of it so lets provide a default version with a per usecs granularity. It should be good enough for most usecases. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- include/linux/cputime.h | 11 +++++++++++ include/linux/kernel_stat.h | 2 +- include/linux/sched.h | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 include/linux/cputime.h (limited to 'include/linux') diff --git a/include/linux/cputime.h b/include/linux/cputime.h new file mode 100644 index 000000000000..2842ebe2844d --- /dev/null +++ b/include/linux/cputime.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_CPUTIME_H +#define __LINUX_CPUTIME_H + +#include + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) \ + usecs_to_cputime((__nsecs) / NSEC_PER_USEC) +#endif + +#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be4a7c3..d7c61317db86 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include /* * 'kernel_stat.h' contains the definitions needed for doing diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84463a0..1ac566c48d3d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,7 +27,7 @@ struct sched_param { #include #include -#include +#include #include #include -- cgit v1.2.3 From d8a9ce3f8ad2b546b9ebaf65de809da0793f11c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:22:37 +0100 Subject: cputime: Bring cputime -> nsecs conversion We already have nsecs_to_cputime(). Now we need to be able to convert the other way around in order to fix a bug on steal time accounting. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- include/linux/cputime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cputime.h b/include/linux/cputime.h index 2842ebe2844d..f2eb2ee535ca 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -3,6 +3,11 @@ #include +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif + #ifndef nsecs_to_cputime # define nsecs_to_cputime(__nsecs) \ usecs_to_cputime((__nsecs) / NSEC_PER_USEC) -- cgit v1.2.3 From 4f6e4f71c9d39cf49e0cb1be5b7721db5fbe92ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2014 15:32:47 +0100 Subject: genirq: Document IRQCHIP_ONESHOT_SAFE flag Add missing documentation of the flag. Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index e675971bdc3f..67ace7aa7947 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -355,6 +355,7 @@ struct irq_chip { * IRQCHIP_ONOFFLINE_ENABLED: Only call irq_on/off_line callbacks * when irq enabled * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip + * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), -- cgit v1.2.3 From 328a4978df833249b099c9875738d7b72042ffe1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2014 19:03:51 +0100 Subject: genirq: Add a new IRQCHIP_EOI_THREADED flag The flag is necessary for interrupt chips which require an ACK/EOI after the handler has run. In case of threaded handlers this needs to happen after the threaded handler has completed before the unmask of the interrupt. The flag is only unseful in combination with the handle_fasteoi_irq flow control handler. It can be combined with the flag IRQCHIP_EOI_IF_HANDLED, so the EOI is not issued when the interrupt is disabled or in progress. Tested-by: Hans de Goede Reviewed-by: Hans de Goede Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Cc: Maxime Ripard Link: http://lkml.kernel.org/r/1394733834-26839-2-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 67ace7aa7947..d278838908cb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -356,6 +356,7 @@ struct irq_chip { * when irq enabled * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask + * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -364,6 +365,7 @@ enum { IRQCHIP_ONOFFLINE_ENABLED = (1 << 3), IRQCHIP_SKIP_SET_WAKE = (1 << 4), IRQCHIP_ONESHOT_SAFE = (1 << 5), + IRQCHIP_EOI_THREADED = (1 << 6), }; /* This include will go away once we isolated irq_desc usage to core code */ -- cgit v1.2.3 From ff0992e9036e9810e7cd45234fa32ca1e79750e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 17 Mar 2014 16:25:18 +0100 Subject: net: cdc_ncm: fix control message ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a context modified revert of commit 6a9612e2cb22 ("net: cdc_ncm: remove ncm_parm field") which introduced a NCM specification violation, causing setup errors for some devices. These errors resulted in the device and host disagreeing about shared settings, with complete failure to communicate as the end result. The NCM specification require that many of the NCM specific control reuests are sent only while the NCM Data Interface is in alternate setting 0. Reverting the commit ensures that we follow this requirement. Fixes: 6a9612e2cb22 ("net: cdc_ncm: remove ncm_parm field") Reported-and-tested-by: Pasi Kärkkäinen Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index c3fa80745996..2c14d9cdd57a 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -88,6 +88,7 @@ #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) struct cdc_ncm_ctx { + struct usb_cdc_ncm_ntb_parameters ncm_parm; struct hrtimer tx_timer; struct tasklet_struct bh; -- cgit v1.2.3 From bc6e7c4b0d1a1f742d96556f63d68f17f4e232c3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 14 Mar 2014 13:52:48 -0700 Subject: libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi Cc: Alan Stern Suggested-by: Tejun Heo Signed-off-by: Todd Brandt Signed-off-by: Dan Williams Signed-off-by: Tejun Heo --- include/linux/libata.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index bec6dbe939a0..5c09e86982c9 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -848,7 +848,6 @@ struct ata_port { struct completion park_req_pending; pm_message_t pm_mesg; - int *pm_result; enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; @@ -1140,16 +1139,14 @@ extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); -extern int ata_sas_port_async_suspend(struct ata_port *ap, int *async); -extern int ata_sas_port_async_resume(struct ata_port *ap, int *async); +extern void ata_sas_port_suspend(struct ata_port *ap); +extern void ata_sas_port_resume(struct ata_port *ap); #else -static inline int ata_sas_port_async_suspend(struct ata_port *ap, int *async) +static inline void ata_sas_port_suspend(struct ata_port *ap) { - return 0; } -static inline int ata_sas_port_async_resume(struct ata_port *ap, int *async) +static inline void ata_sas_port_async_resume(struct ata_port *ap) { - return 0; } #endif extern int ata_ratelimit(void); -- cgit v1.2.3 From a5a6569959fc55d4ebf1526f7855003596946c32 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 19 Mar 2014 10:46:25 -0700 Subject: libata.h: add stub for ata_sas_port_resume Fix build error when CONFIG_PM is not enabled by adding a stub function in . drivers/scsi/libsas/sas_ata.c: In function 'sas_resume_sata': drivers/scsi/libsas/sas_ata.c:756:3: error: implicit declaration of function 'ata_sas_port_resume' [-Werror=implicit-function-declaration] Signed-off-by: Randy Dunlap Reported-by: Jim Davis Signed-off-by: Tejun Heo Cc: Dan Williams --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 5c09e86982c9..52723789b991 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1148,6 +1148,9 @@ static inline void ata_sas_port_suspend(struct ata_port *ap) static inline void ata_sas_port_async_resume(struct ata_port *ap) { } +static inline void ata_sas_port_resume(struct ata_port *ap) +{ +} #endif extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); -- cgit v1.2.3 From 0dd5d6f0e8763ff09939adf3e5b1465a3a414fea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 19 Mar 2014 11:14:15 -0700 Subject: libata: remove unused ata_sas_port_async_resume() stub Commit bc6e7c4b0d1a "libata, libsas: kill pm_result and related cleanup" renamed ata_sas_port_async_resume() to ata_sas_port_resume(), but missed a CONFIG_PM=n stub conversion. Randy fixed that up in commit a5a6569959fc "libata.h: add stub for ata_sas_port_resume", but missed the deletion of the now unused ata_sas_port_async_resume() routine. Cc: Randy Dunlap Signed-off-by: Dan Williams Signed-off-by: Tejun Heo --- include/linux/libata.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 52723789b991..1de36be64df4 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1145,9 +1145,6 @@ extern void ata_sas_port_resume(struct ata_port *ap); static inline void ata_sas_port_suspend(struct ata_port *ap) { } -static inline void ata_sas_port_async_resume(struct ata_port *ap) -{ -} static inline void ata_sas_port_resume(struct ata_port *ap) { } -- cgit v1.2.3 From e2e680fb7566880f7210cadf628c092c0433971c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Mar 2014 16:21:33 +0530 Subject: hrtimer: Rearrange comments in the order struct members are declared Rearrange kernel doc comments in the order members of struct hrtimer are declared. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1db1a3cfbe8a9ea49396af75c6ac04a2e67e3ab0.1395226248.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d19a5c2d2270..e7a8d3fa91d5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -96,12 +96,12 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) + * @start_pid: timer statistics field to store the pid of the task which + * started the timer * @start_site: timer statistics field to store the site where the timer * was started * @start_comm: timer statistics field to store the name of the process which * started the timer - * @start_pid: timer statistics field to store the pid of the task which - * started the timer * * The hrtimer structure must be initialized by hrtimer_init() */ -- cgit v1.2.3 From 6201b4d61fbf194df6371fb3376c5026cb8f5eec Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 18 Mar 2014 16:26:07 +0530 Subject: timer: Remove code redundancy while calling get_nohz_timer_target() There are only two users of get_nohz_timer_target(): timer and hrtimer. Both call it under same circumstances, i.e. #ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif So, it makes more sense to get all this as part of get_nohz_timer_target() instead of duplicating code at two places. For this another parameter is required to be passed to this routine, pinned. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1e1b53537217d58d48c2d7a222a9c3ac47d5b64c.1395140107.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84463a0..6f6c56f63c68 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -291,10 +291,14 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(void); +extern int get_nohz_timer_target(int pinned); #else static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } +static inline int get_nohz_timer_target(int pinned) +{ + return smp_processor_id(); +} #endif /* -- cgit v1.2.3 From f5b972e9fbd2e99a2abc3221783d089799b69394 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 20 Mar 2014 15:30:14 +0100 Subject: compat: include linux/unistd.h within linux/compat.h linux/compat.h does not include linux/unistd.h but the compat.h header file contains various conditional #ifdef __ARCH_WANT_COMPAT_... asmlinkage long compat...() #endif compat system call function declarations. If linux/unistd.h isn't included it depends on previous includes if those __ARCH_WANT_COMPAT_... defines are defined or not. So add an additional linux/unistd.h include. Should fix this compile error on tile: include/uapi/asm-generic/unistd.h:195:1: error: 'compat_sys_getdents64' undeclared make[3]: *** [arch/tile/kernel/compat.o] Error 1 Reported-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Acked-by: Chris Metcalf Signed-off-by: Heiko Carstens --- include/linux/compat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 7c765624b7ef..01c0aa57ccec 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -14,6 +14,7 @@ #include #include #include /* for aio_context_t */ +#include #include #include -- cgit v1.2.3 From 87291347c49dc40aa339f587b209618201c2e527 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Thu, 13 Feb 2014 19:51:48 -0800 Subject: tracing: Fix array size mismatch in format string In event format strings, the array size is reported in two locations. One in array subscript and then via the "size:" attribute. The values reported there have a mismatch. For e.g., in sched:sched_switch the prev_comm and next_comm character arrays have subscript values as [32] where as the actual field size is 16. name: sched_switch ID: 301 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1;signed:0; field:int common_pid; offset:4; size:4; signed:1; field:char prev_comm[32]; offset:8; size:16; signed:1; field:pid_t prev_pid; offset:24; size:4; signed:1; field:int prev_prio; offset:28; size:4; signed:1; field:long prev_state; offset:32; size:8; signed:1; field:char next_comm[32]; offset:40; size:16; signed:1; field:pid_t next_pid; offset:56; size:4; signed:1; field:int next_prio; offset:60; size:4; signed:1; After bisection, the following commit was blamed: 92edca0 tracing: Use direct field, type and system names This commit removes the duplication of strings for field->name and field->type assuming that all the strings passed in __trace_define_field() are immutable. This is not true for arrays, where the type string is created in event_storage variable and field->type for all array fields points to event_storage. Use __stringify() to create a string constant for the type string. Also, get rid of event_storage and event_storage_mutex that are not needed anymore. also, an added benefit is that this reduces the overhead of events a bit more: text data bss dec hex filename 8424787 2036472 1302528 11763787 b3804b vmlinux 8420814 2036408 1302528 11759750 b37086 vmlinux.patched Link: http://lkml.kernel.org/r/1392349908-29685-1-git-send-email-vnagarnaik@google.com Cc: Laurent Chavey Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4e4cc28623ad..4cdb3a17bcb5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -495,10 +495,6 @@ enum { FILTER_TRACE_FN, }; -#define EVENT_STORAGE_SIZE 128 -extern struct mutex event_storage_mutex; -extern char event_storage[EVENT_STORAGE_SIZE]; - extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, -- cgit v1.2.3 From 8c90487cdc64847b4fdd812ab3047f426fec4d13 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 26 Feb 2014 10:49:49 -0500 Subject: Rename TAINT_UNSAFE_SMP to TAINT_CPU_OUT_OF_SPEC Rename TAINT_UNSAFE_SMP to TAINT_CPU_OUT_OF_SPEC, so we can repurpose the flag to encompass a wider range of pushing the CPU beyond its warrany. Signed-off-by: Dave Jones Link: http://lkml.kernel.org/r/20140226154949.GA770@redhat.com Signed-off-by: H. Peter Anvin --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 196d1ea86df0..08fb02477641 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -458,7 +458,7 @@ extern enum system_states { #define TAINT_PROPRIETARY_MODULE 0 #define TAINT_FORCED_MODULE 1 -#define TAINT_UNSAFE_SMP 2 +#define TAINT_CPU_OUT_OF_SPEC 2 #define TAINT_FORCED_RMMOD 3 #define TAINT_MACHINE_CHECK 4 #define TAINT_BAD_PAGE 5 -- cgit v1.2.3 From 765a3f4fed708ae429ee095914a7897acb3a65bd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Mar 2014 16:37:08 -0700 Subject: rcu: Provide grace-period piggybacking API The following pattern is currently not well supported by RCU: 1. Make data element inaccessible to RCU readers. 2. Do work that probably lasts for more than one grace period. 3. Do something to make sure RCU readers in flight before #1 above have completed. Here are some things that could currently be done: a. Do a synchronize_rcu() unconditionally at either #1 or #3 above. This works, but imposes needless work and latency. b. Post an RCU callback at #1 above that does a wakeup, then wait for the wakeup at #3. This works well, but likely results in an extra unneeded grace period. Open-coding this is also a bit more semi-tricky code than would be good. This commit therefore adds get_state_synchronize_rcu() and cond_synchronize_rcu() APIs. Call get_state_synchronize_rcu() at #1 above and pass its return value to cond_synchronize_rcu() at #3 above. This results in a call to synchronize_rcu() if no grace period has elapsed between #1 and #3, but requires only a load, comparison, and memory barrier if a full grace period did elapse. Requested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra --- include/linux/rcutiny.h | 10 ++++++++++ include/linux/rcutree.h | 2 ++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e8cb6e3b52a7..425c659d54e5 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,6 +27,16 @@ #include +static inline unsigned long get_state_synchronize_rcu(void) +{ + return 0; +} + +static inline void cond_synchronize_rcu(unsigned long oldstate) +{ + might_sleep(); +} + static inline void rcu_barrier_bh(void) { wait_rcu_gp(call_rcu_bh); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e9c63884df0a..a59ca05fd4e3 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -76,6 +76,8 @@ static inline void synchronize_rcu_bh_expedited(void) void rcu_barrier(void); void rcu_barrier_bh(void); void rcu_barrier_sched(void); +unsigned long get_state_synchronize_rcu(void); +void cond_synchronize_rcu(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; -- cgit v1.2.3 From 7e09e738afd21ef99f047425fc0b0c9be8b03254 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 20 Mar 2014 21:52:17 -0700 Subject: mm: fix swapops.h:131 bug if remap_file_pages raced migration Add remove_linear_migration_ptes_from_nonlinear(), to fix an interesting little include/linux/swapops.h:131 BUG_ON(!PageLocked) found by trinity: indicating that remove_migration_ptes() failed to find one of the migration entries that was temporarily inserted. The problem comes from remap_file_pages()'s switch from vma_interval_tree (good for inserting the migration entry) to i_mmap_nonlinear list (no good for locating it again); but can only be a problem if the remap_file_pages() range does not cover the whole of the vma (zap_pte() clears the range). remove_migration_ptes() needs a file_nonlinear method to go down the i_mmap_nonlinear list, applying linear location to look for migration entries in those vmas too, just in case there was this race. The file_nonlinear method does need rmap_walk_control.arg to do this; but it never needed vma passed in - vma comes from its own iteration. Reported-and-tested-by: Dave Jones Reported-and-tested-by: Sasha Levin Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1da693d51255..b66c2110cb1f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -250,8 +250,7 @@ struct rmap_walk_control { int (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); - int (*file_nonlinear)(struct page *, struct address_space *, - struct vm_area_struct *vma); + int (*file_nonlinear)(struct page *, struct address_space *, void *arg); struct anon_vma *(*anon_lock)(struct page *page); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -- cgit v1.2.3 From 41f50094b2ab4e37673e41a084ea61b907447159 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Mar 2014 21:37:02 +0100 Subject: workqueue: Spelling s/instensive/intensive/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 29da9e77c3bb..0523eab05f63 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -294,7 +294,7 @@ enum { WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ - WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ /* -- cgit v1.2.3 From ea2e64f280d2a34a8ed9ae3d783cd770d14b70ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Mar 2014 14:20:44 +0000 Subject: workqueue: Provide destroy_delayed_work_on_stack() If a delayed or deferrable work is on stack we need to tell debug objects that we are destroying the timer and the work. Otherwise we leak the tracking object. Signed-off-by: Thomas Gleixner Cc: Vince Weaver Acked-by: Tejun Heo Link: http://lkml.kernel.org/r/20140323141939.911487677@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/workqueue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 594521ba0d43..abdbe5af119d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -191,6 +191,7 @@ struct execute_work { #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); +extern void destroy_delayed_work_on_stack(struct delayed_work *work); static inline unsigned int work_static(struct work_struct *work) { return *work_data_bits(work) & WORK_STRUCT_STATIC; @@ -198,6 +199,7 @@ static inline unsigned int work_static(struct work_struct *work) #else static inline void __init_work(struct work_struct *work, int onstack) { } static inline void destroy_work_on_stack(struct work_struct *work) { } +static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { } static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif -- cgit v1.2.3 From 14a0d635d18d0fb552dcc979d6d25106e6541f2e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 26 Mar 2014 14:32:51 +0100 Subject: usbnet: include wait queue head in device structure This fixes a race which happens by freeing an object on the stack. Quoting Julius: > The issue is > that it calls usbnet_terminate_urbs() before that, which temporarily > installs a waitqueue in dev->wait in order to be able to wait on the > tasklet to run and finish up some queues. The waiting itself looks > okay, but the access to 'dev->wait' is totally unprotected and can > race arbitrarily. I think in this case usbnet_bh() managed to succeed > it's dev->wait check just before usbnet_terminate_urbs() sets it back > to NULL. The latter then finishes and the waitqueue_t structure on its > stack gets overwritten by other functions halfway through the > wake_up() call in usbnet_bh(). The fix is to just not allocate the data structure on the stack. As dev->wait is abused as a flag it also takes a runtime PM change to fix this bug. Signed-off-by: Oliver Neukum Reported-by: Grant Grundler Tested-by: Grant Grundler Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e303eef94dd5..0662e98fef72 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -30,7 +30,7 @@ struct usbnet { struct driver_info *driver_info; const char *driver_name; void *driver_priv; - wait_queue_head_t *wait; + wait_queue_head_t wait; struct mutex phy_mutex; unsigned char suspend_count; unsigned char pkt_cnt, pkt_err; -- cgit v1.2.3 From 36d5fe6a000790f56039afe26834265db0a3ad4c Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Wed, 26 Mar 2014 22:37:45 +0000 Subject: core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors skb_zerocopy can copy elements of the frags array between skbs, but it doesn't orphan them. Also, it doesn't handle errors, so this patch takes care of that as well, and modify the callers accordingly. skb_tx_error() is also added to the callers so they will signal the failed delivery towards the creator of the skb. Signed-off-by: Zoltan Kiss Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5e1e6f2d98c2..15ede6a823a6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2451,8 +2451,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); -void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, - int len, int hlen); +int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); -- cgit v1.2.3 From 53d6471cef17262d3ad1c7ce8982a234244f68ec Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:26:18 -0400 Subject: net: Account for all vlan headers in skb_mac_gso_segment skb_network_protocol() already accounts for multiple vlan headers that may be present in the skb. However, skb_mac_gso_segment() doesn't know anything about it and assumes that skb->mac_len is set correctly to skip all mac headers. That may not always be the case. If we are simply forwarding the packet (via bridge or macvtap), all vlan headers may not be accounted for. A simple solution is to allow skb_network_protocol to return the vlan depth it has calculated. This way skb_mac_gso_segment will correctly skip all mac headers. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8eeebd49a98..daafd9561cbc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3014,7 +3014,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } -__be16 skb_network_protocol(struct sk_buff *skb); +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) -- cgit v1.2.3 From 2adb956b084d6d49f519541a4b5f9947e96f8ef7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:49 -0400 Subject: vlan: Warn the user if lowerdev has bad vlan features. Some drivers incorrectly assign vlan acceleration features to vlan_features thus causing issues for Q-in-Q vlan configurations. Warn the user of such cases. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1005ebf17575..5a09a48f2658 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -163,4 +163,11 @@ enum { /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) +#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_FILTER | \ + NETIF_F_HW_VLAN_STAG_RX | \ + NETIF_F_HW_VLAN_STAG_TX) + #endif /* _LINUX_NETDEV_FEATURES_H */ -- cgit v1.2.3 From 59ff3eb6d6f75c6c1c3ea8b46ac2cc64eb216547 Mon Sep 17 00:00:00 2001 From: ZhangZhen Date: Thu, 27 Mar 2014 09:41:47 +0800 Subject: workqueue: remove deprecated WQ_NON_REENTRANT Tejun Heo has made WQ_NON_REENTRANT useless in the dbf2576e37 ("workqueue: make all workqueues non-reentrant"). So remove its usages and definition. This patch doesn't introduce any behavior changes. tj: minor description updates. Signed-off-by: ZhangZhen Sigend-off-by: Tejun Heo Acked-by: James Chapman Acked-by: Ulf Hansson --- include/linux/workqueue.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0523eab05f63..532994651684 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -284,12 +284,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * Documentation/workqueue.txt. */ enum { - /* - * All wqs are now non-reentrant making the following flag - * meaningless. Will be removed. - */ - WQ_NON_REENTRANT = 1 << 0, /* DEPRECATED */ - WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ -- cgit v1.2.3 From 00a1a053ebe5febcfc2ec498bd894f035ad2aa06 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Mar 2014 10:20:01 -0400 Subject: ext4: atomically set inode->i_flags in ext4_set_inode_flags() Use cmpxchg() to atomically set i_flags instead of clearing out the S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race where an immutable file has the immutable flag cleared for a brief window of time. Reported-by: John Sullivan Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index abc9ca778456..be5fd38bd5a0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -196,6 +196,21 @@ static inline unsigned long __ffs64(u64 word) #ifdef __KERNEL__ +#ifndef set_mask_bits +#define set_mask_bits(ptr, _mask, _bits) \ +({ \ + const typeof(*ptr) mask = (_mask), bits = (_bits); \ + typeof(*ptr) old, new; \ + \ + do { \ + old = ACCESS_ONCE(*ptr); \ + new = (old & ~mask) | bits; \ + } while (cmpxchg(ptr, old, new) != old); \ + \ + new; \ +}) +#endif + #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region -- cgit v1.2.3