From 44318d5b07be7d7cfe718aa22ea3b2577361a0b5 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 2 Oct 2018 16:19:16 -0400 Subject: locking/lockdep: Remove add_chain_cache_classes() The inline function add_chain_cache_classes() is defined, but has no caller. Just remove it. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1538511560-10090-2-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 70 ------------------------------------------------ 1 file changed, 70 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e406c5fdb41e..fa82d55279fe 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2148,76 +2148,6 @@ static int check_no_collision(struct task_struct *curr, return 1; } -/* - * This is for building a chain between just two different classes, - * instead of adding a new hlock upon current, which is done by - * add_chain_cache(). - * - * This can be called in any context with two classes, while - * add_chain_cache() must be done within the lock owener's context - * since it uses hlock which might be racy in another context. - */ -static inline int add_chain_cache_classes(unsigned int prev, - unsigned int next, - unsigned int irq_context, - u64 chain_key) -{ - struct hlist_head *hash_head = chainhashentry(chain_key); - struct lock_chain *chain; - - /* - * Allocate a new chain entry from the static array, and add - * it to the hash: - */ - - /* - * We might need to take the graph lock, ensure we've got IRQs - * disabled to make this an IRQ-safe lock.. for recursion reasons - * lockdep won't complain about its own locking errors. - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return 0; - - if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { - if (!debug_locks_off_graph_unlock()) - return 0; - - print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); - dump_stack(); - return 0; - } - - chain = lock_chains + nr_lock_chains++; - chain->chain_key = chain_key; - chain->irq_context = irq_context; - chain->depth = 2; - if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { - chain->base = nr_chain_hlocks; - nr_chain_hlocks += chain->depth; - chain_hlocks[chain->base] = prev - 1; - chain_hlocks[chain->base + 1] = next -1; - } -#ifdef CONFIG_DEBUG_LOCKDEP - /* - * Important for check_no_collision(). - */ - else { - if (!debug_locks_off_graph_unlock()) - return 0; - - print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); - dump_stack(); - return 0; - } -#endif - - hlist_add_head_rcu(&chain->entry, hash_head); - debug_atomic_inc(chain_lookup_misses); - inc_chains(); - - return 1; -} - /* * Adds a dependency chain into chain hashtable. And must be called with * graph_lock held. -- cgit v1.2.3 From 8ee10862476ef8b9e81e5b521205fd5c620b4ffb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 2 Oct 2018 16:19:17 -0400 Subject: locking/lockdep: Eliminate redundant IRQs check in __lock_acquire() The static __lock_acquire() function has only two callers: 1) lock_acquire() 2) reacquire_held_locks() In lock_acquire(), raw_local_irq_save() is called beforehand. So IRQs must have been disabled. So the check: DEBUG_LOCKS_WARN_ON(!irqs_disabled()) is kind of redundant in this case. So move the above check to reacquire_held_locks() to eliminate redundant code in the lock_acquire() path. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1538511560-10090-3-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index fa82d55279fe..a5d7db558928 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3193,6 +3193,10 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); /* * This gets called for every mutex_lock*()/spin_lock*() operation. * We maintain the dependency maps and validate the locking attempt: + * + * The callers must make sure that IRQs are disabled before calling it, + * otherwise we could get an interrupt which would want to take locks, + * which would end up in lockdep again. */ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, @@ -3210,14 +3214,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (unlikely(!debug_locks)) return 0; - /* - * Lockdep should run with IRQs disabled, otherwise we could - * get an interrupt which would want to take locks, which would - * end up in lockdep and have you got a head-ache already? - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return 0; - if (!prove_locking || lock->key == &__lockdep_no_validate__) check = 0; @@ -3474,6 +3470,9 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth, { struct held_lock *hlock; + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { if (!__lock_acquire(hlock->instance, hlock_class(hlock)->subclass, -- cgit v1.2.3 From ce52a18db45842f5b992851a552bd7f6acb2241b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 2 Oct 2018 16:19:18 -0400 Subject: locking/lockdep: Add a faster path in __lock_release() When __lock_release() is called, the most likely unlock scenario is on the innermost lock in the chain. In this case, we can skip some of the checks and provide a faster path to completion. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1538511560-10090-4-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index a5d7db558928..511d30f88bce 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3626,6 +3626,13 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) curr->lockdep_depth = i; curr->curr_chain_key = hlock->prev_chain_key; + /* + * The most likely case is when the unlock is on the innermost + * lock. In this case, we are done! + */ + if (i == depth-1) + return 1; + if (reacquire_held_locks(curr, depth, i + 1)) return 0; @@ -3633,10 +3640,14 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) * We had N bottles of beer on the wall, we drank one, but now * there's not N-1 bottles of beer left on the wall... */ - if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) - return 0; + DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1); - return 1; + /* + * Since reacquire_held_locks() would have called check_chain_key() + * indirectly via __lock_acquire(), we don't need to do it again + * on return. + */ + return 0; } static int __lock_is_held(const struct lockdep_map *lock, int read) -- cgit v1.2.3 From 8ca2b56cd7da98fc8f8d787bb706b9d6c8674a3b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 3 Oct 2018 13:07:18 -0400 Subject: locking/lockdep: Make class->ops a percpu counter and move it under CONFIG_DEBUG_LOCKDEP=y A sizable portion of the CPU cycles spent on the __lock_acquire() is used up by the atomic increment of the class->ops stat counter. By taking it out from the lock_class structure and changing it to a per-cpu per-lock-class counter, we can reduce the amount of cacheline contention on the class structure when multiple CPUs are trying to acquire locks of the same class simultaneously. To limit the increase in memory consumption because of the percpu nature of that counter, it is now put back under the CONFIG_DEBUG_LOCKDEP config option. So the memory consumption increase will only occur if CONFIG_DEBUG_LOCKDEP is defined. The lock_class structure, however, is reduced in size by 16 bytes on 64-bit archs after ops removal and a minor restructuring of the fields. This patch also fixes a bug in the increment code as the counter is of the 'unsigned long' type, but atomic_inc() was used to increment it. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/d66681f3-8781-9793-1dcf-2436a284550b@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 511d30f88bce..a0f83058d6aa 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -139,7 +139,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; * get freed - this significantly simplifies the debugging code. */ unsigned long nr_lock_classes; -static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; static inline struct lock_class *hlock_class(struct held_lock *hlock) { @@ -436,6 +436,7 @@ unsigned int max_lockdep_depth; * Various lockdep statistics: */ DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); +DEFINE_PER_CPU(unsigned long [MAX_LOCKDEP_KEYS], lock_class_ops); #endif /* @@ -1392,7 +1393,9 @@ static void print_lock_class_header(struct lock_class *class, int depth) printk("%*s->", depth, ""); print_lock_name(class); - printk(KERN_CONT " ops: %lu", class->ops); +#ifdef CONFIG_DEBUG_LOCKDEP + printk(KERN_CONT " ops: %lu", debug_class_ops_read(class)); +#endif printk(KERN_CONT " {\n"); for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { @@ -3227,7 +3230,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!class) return 0; } - atomic_inc((atomic_t *)&class->ops); + + debug_class_ops_inc(class); + if (very_verbose(class)) { printk("\nacquire class [%px] %s", class->key, class->name); if (class->name_version > 1) -- cgit v1.2.3 From 4766ab5677a2842834f9bc4a21587256a811531c Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 12 Oct 2018 17:42:27 -0400 Subject: locking/lockdep: Remove duplicated 'lock_class_ops' percpu array Remove the duplicated 'lock_class_ops' percpu array that is not used anywhere. Signed-off-by: Waiman Long Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Fixes: 8ca2b56cd7da ("locking/lockdep: Make class->ops a percpu counter and move it under CONFIG_DEBUG_LOCKDEP=y") Link: http://lkml.kernel.org/r/1539380547-16726-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index a0f83058d6aa..8a732c856624 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -436,7 +436,6 @@ unsigned int max_lockdep_depth; * Various lockdep statistics: */ DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); -DEFINE_PER_CPU(unsigned long [MAX_LOCKDEP_KEYS], lock_class_ops); #endif /* -- cgit v1.2.3 From 9506a7425b094d2f1d9c877ed5a78f416669269b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 18 Oct 2018 21:45:17 -0400 Subject: locking/lockdep: Fix debug_locks off performance problem It was found that when debug_locks was turned off because of a problem found by the lockdep code, the system performance could drop quite significantly when the lock_stat code was also configured into the kernel. For instance, parallel kernel build time on a 4-socket x86-64 server nearly doubled. Further analysis into the cause of the slowdown traced back to the frequent call to debug_locks_off() from the __lock_acquired() function probably due to some inconsistent lockdep states with debug_locks off. The debug_locks_off() function did an unconditional atomic xchg to write a 0 value into debug_locks which had already been set to 0. This led to severe cacheline contention in the cacheline that held debug_locks. As debug_locks is being referenced in quite a few different places in the kernel, this greatly slow down the system performance. To prevent that trashing of debug_locks cacheline, lock_acquired() and lock_contended() now checks the state of debug_locks before proceeding. The debug_locks_off() function is also modified to check debug_locks before calling __debug_locks_off(). Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index be76f476c63f..1efada2dd9dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4066,7 +4066,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) @@ -4086,7 +4086,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) -- cgit v1.2.3