From 1bf70d08cc3b55abd1763e6dff5855cb8dd8318b Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 4 Mar 2025 15:52:33 +0530 Subject: block: introduce a dedicated lock for protecting queue elevator updates A queue's elevator can be updated either when modifying nr_hw_queues or through the sysfs scheduler attribute. Currently, elevator switching/ updating is protected using q->sysfs_lock, but this has led to lockdep splats[1] due to inconsistent lock ordering between q->sysfs_lock and the freeze-lock in multiple block layer call sites. As the scope of q->sysfs_lock is not well-defined, its (mis)use has resulted in numerous lockdep warnings. To address this, introduce a new q->elevator_lock, dedicated specifically for protecting elevator switches/updates. And we'd now use this new q->elevator_lock instead of q->sysfs_lock for protecting elevator switches/updates. While at it, make elv_iosched_load_module() a static function, as it is only called from elv_iosched_store(). Also, remove redundant parameters from elv_iosched_load_module() function signature. [1] https://lore.kernel.org/all/67637e70.050a0220.3157ee.000c.GAE@google.com/ Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250304102551.2533767-5-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 248416ecd01c..31b1b635c710 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -560,6 +560,14 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; + /* + * Protects against I/O scheduler switching, specifically when + * updating q->elevator. To ensure proper locking order during + * an elevator update, first freeze the queue, then acquire + * ->elevator_lock. + */ + struct mutex elevator_lock; + struct mutex sysfs_lock; struct mutex limits_lock; -- cgit v1.2.3 From 3efe7571c3ae2b6481253a2616c2bb3fbadd503b Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 4 Mar 2025 15:52:34 +0530 Subject: block: protect nr_requests update using q->elevator_lock The sysfs attribute nr_requests could be simultaneously updated from elevator switch/update or nr_hw_queue update code path. The update to nr_requests for each of those code paths runs holding q->elevator_lock. So we should protect access to sysfs attribute nr_requests using q-> elevator_lock instead of q->sysfs_lock. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250304102551.2533767-6-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 31b1b635c710..3e66ad016a23 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,10 +561,12 @@ struct request_queue { struct list_head flush_list; /* - * Protects against I/O scheduler switching, specifically when - * updating q->elevator. To ensure proper locking order during - * an elevator update, first freeze the queue, then acquire - * ->elevator_lock. + * Protects against I/O scheduler switching, particularly when + * updating q->elevator. Since the elevator update code path may + * also modify q->nr_requests, this lock also protects the sysfs + * attribute nr_requests. + * To ensure proper locking order during an elevator update, first + * freeze the queue, then acquire ->elevator_lock. */ struct mutex elevator_lock; -- cgit v1.2.3 From 245618f8e45ff4f79327627b474b563da71c2c75 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 4 Mar 2025 15:52:35 +0530 Subject: block: protect wbt_lat_usec using q->elevator_lock The wbt latency and state could be updated while initializing the elevator or exiting the elevator. It could be also updated while configuring IO latency QoS parameters using cgroup. The elevator code path is now protected with q->elevator_lock. So we should protect the access to sysfs attribute wbt_lat_usec using q->elevator _lock instead of q->sysfs_lock. White we're at it, also protect ioc_qos_write(), which configures wbt parameters via cgroup, using q->elevator_lock. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250304102551.2533767-7-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3e66ad016a23..0ee3b5c9388e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -563,8 +563,8 @@ struct request_queue { /* * Protects against I/O scheduler switching, particularly when * updating q->elevator. Since the elevator update code path may - * also modify q->nr_requests, this lock also protects the sysfs - * attribute nr_requests. + * also modify q->nr_requests and wbt latency, this lock also + * protects the sysfs attributes nr_requests and wbt_lat_usec. * To ensure proper locking order during an elevator update, first * freeze the queue, then acquire ->elevator_lock. */ -- cgit v1.2.3 From 5e40f4452dc9a3fb44d13bb6bc7032f3911a2675 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 4 Mar 2025 15:52:36 +0530 Subject: block: protect read_ahead_kb using q->limits_lock The bdi->ra_pages could be updated under q->limits_lock because it's usually calculated from the queue limits by queue_limits_commit_update. So protect reading/writing the sysfs attribute read_ahead_kb using q->limits_lock instead of q->sysfs_lock. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250304102551.2533767-8-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0ee3b5c9388e..3bee1b4858b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -571,6 +571,9 @@ struct request_queue { struct mutex elevator_lock; struct mutex sysfs_lock; + /* + * Protects queue limits and also sysfs attribute read_ahead_kb. + */ struct mutex limits_lock; /* -- cgit v1.2.3 From 5abba4cebec0a591ca7e7f55701e42cd5dc059af Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 6 Mar 2025 15:09:53 +0530 Subject: block: protect hctx attributes/params using q->elevator_lock Currently, hctx attributes (nr_tags, nr_reserved_tags, and cpu_list) are protected using `q->sysfs_lock`. However, these attributes can be updated in multiple scenarios: - During the driver's probe method. - When updating nr_hw_queues. - When writing to the sysfs attribute nr_requests, which can modify nr_tags. The nr_requests attribute is already protected using q->elevator_lock, but none of the update paths actually use q->sysfs_lock to protect hctx attributes. So to ensure proper synchronization, replace q->sysfs_lock with q->elevator_lock when reading hctx attributes through sysfs. Additionally, blk_mq_update_nr_hw_queues allocates and updates hctx. The allocation of hctx is protected using q->elevator_lock, however, updating hctx params happens without any protection, so safeguard hctx param update path by also using q->elevator_lock. Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250306093956.2818808-1-nilay@linux.ibm.com [axboe: wrap comment at 80 chars] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3bee1b4858b6..dcf8fce15e23 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,12 +561,14 @@ struct request_queue { struct list_head flush_list; /* - * Protects against I/O scheduler switching, particularly when - * updating q->elevator. Since the elevator update code path may - * also modify q->nr_requests and wbt latency, this lock also - * protects the sysfs attributes nr_requests and wbt_lat_usec. - * To ensure proper locking order during an elevator update, first - * freeze the queue, then acquire ->elevator_lock. + * Protects against I/O scheduler switching, particularly when updating + * q->elevator. Since the elevator update code path may also modify q-> + * nr_requests and wbt latency, this lock also protects the sysfs attrs + * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update + * may modify hctx tags, reserved-tags and cpumask, so this lock also + * helps protect the hctx attrs. To ensure proper locking order during + * an elevator or nr_hw_queue update, first freeze the queue, then + * acquire ->elevator_lock. */ struct mutex elevator_lock; -- cgit v1.2.3 From a3996d11f3ab743e6cc4e3529ce9459c2cd27139 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 13 Mar 2025 17:21:50 +0530 Subject: block: protect debugfs attrs using elevator_lock instead of sysfs_lock Currently, the block debugfs attributes (tags, tags_bitmap, sched_tags, and sched_tags_bitmap) are protected using q->sysfs_lock. However, these attributes are updated in multiple scenarios: - During driver probe method - During an elevator switch/update - During an nr_hw_queues update - When writing to the sysfs attribute nr_requests All these update paths (except driver probe method, which doesn't require any protection) are already protected using q->elevator_lock. To ensure consistency and proper synchronization, replace q->sysfs_lock with q->elevator_lock for protecting these debugfs attributes. Signed-off-by: Nilay Shroff Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250313115235.3707600-2-nilay@linux.ibm.com [axboe: some commit message rewording/fixes] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dcf8fce15e23..8d072042790e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -566,9 +566,9 @@ struct request_queue { * nr_requests and wbt latency, this lock also protects the sysfs attrs * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update * may modify hctx tags, reserved-tags and cpumask, so this lock also - * helps protect the hctx attrs. To ensure proper locking order during - * an elevator or nr_hw_queue update, first freeze the queue, then - * acquire ->elevator_lock. + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking + * order during an elevator or nr_hw_queue update, first freeze the + * queue, then acquire ->elevator_lock. */ struct mutex elevator_lock; -- cgit v1.2.3