From cf0ca9fe5dd9e3693d935757a7b2fc50fc576554 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:32 -0700 Subject: mm: bdi: export BDI attributes in sysfs Provide a place in sysfs (/sys/class/bdi) for the backing_dev_info object. This allows us to see and set the various BDI specific variables. In particular this properly exposes the read-ahead window for all relevant users and /sys/block//queue/read_ahead_kb should be deprecated. With patient help from Kay Sievers and Greg KH [mszeredi@suse.cz] - split off NFS and FUSE changes into separate patches - document new sysfs attributes under Documentation/ABI - do bdi_class_init as a core_initcall, otherwise the "default" BDI won't be initialized - remove bdi_init_fmt macro, it's not used very much [akpm@linux-foundation.org: fix ia64 warning] Signed-off-by: Peter Zijlstra Cc: Kay Sievers Acked-by: Greg KH Cc: Trond Myklebust Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e8644b1e5527..847eabe4824c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -4,12 +4,129 @@ #include #include #include +#include +#include + + +static struct class *bdi_class; + +static ssize_t read_ahead_kb_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned long read_ahead_kb; + ssize_t ret = -EINVAL; + + read_ahead_kb = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10); + ret = count; + } + return ret; +} + +#define K(pages) ((pages) << (PAGE_SHIFT - 10)) + +#define BDI_SHOW(name, expr) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *page) \ +{ \ + struct backing_dev_info *bdi = dev_get_drvdata(dev); \ + \ + return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ +} + +BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) + +BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE))) +BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK))) + +static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) +{ + unsigned long thresh[3]; + + get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi); + + return thresh[i]; +} + +BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) +BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) + +#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) + +static struct device_attribute bdi_dev_attrs[] = { + __ATTR_RW(read_ahead_kb), + __ATTR_RO(reclaimable_kb), + __ATTR_RO(writeback_kb), + __ATTR_RO(dirty_kb), + __ATTR_RO(bdi_dirty_kb), + __ATTR_NULL, +}; + +static __init int bdi_class_init(void) +{ + bdi_class = class_create(THIS_MODULE, "bdi"); + bdi_class->dev_attrs = bdi_dev_attrs; + return 0; +} + +core_initcall(bdi_class_init); + +int bdi_register(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, ...) +{ + char *name; + va_list args; + int ret = 0; + struct device *dev; + + va_start(args, fmt); + name = kvasprintf(GFP_KERNEL, fmt, args); + va_end(args); + + if (!name) + return -ENOMEM; + + dev = device_create(bdi_class, parent, MKDEV(0, 0), name); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto exit; + } + + bdi->dev = dev; + dev_set_drvdata(bdi->dev, bdi); + +exit: + kfree(name); + return ret; +} +EXPORT_SYMBOL(bdi_register); + +int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) +{ + return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); +} +EXPORT_SYMBOL(bdi_register_dev); + +void bdi_unregister(struct backing_dev_info *bdi) +{ + if (bdi->dev) { + device_unregister(bdi->dev); + bdi->dev = NULL; + } +} +EXPORT_SYMBOL(bdi_unregister); int bdi_init(struct backing_dev_info *bdi) { int i; int err; + bdi->dev = NULL; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); if (err) @@ -33,6 +150,8 @@ void bdi_destroy(struct backing_dev_info *bdi) { int i; + bdi_unregister(bdi); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); -- cgit v1.2.3 From 189d3c4a94ef19fca2a71a6a336e9fda900e25e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:35 -0700 Subject: mm: bdi: allow setting a minimum for the bdi dirty limit Under normal circumstances each device is given a part of the total write-back cache that relates to its current avg writeout speed in relation to the other devices. min_ratio - allows one to assign a minimum portion of the write-back cache to a particular device. This is useful in situations where you might want to provide a minimum QoS. (One request for this feature came from flash based storage people who wanted to avoid writing out at all costs - they of course needed some pdflush hacks as well) max_ratio - allows one to assign a maximum portion of the dirty limit to a particular device. This is useful in situations where you want to avoid one device taking all or most of the write-back cache. Eg. an NFS mount that is prone to get stuck, or a FUSE mount which you don't trust to play fair. Add "min_ratio" to /sys/class/bdi. This indicates the minimum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in min_ratio_store() - document new sysfs attribute Signed-off-by: Peter Zijlstra Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 847eabe4824c..4967fb176e53 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -55,6 +55,24 @@ static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) +static ssize_t min_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_min_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(min_ratio, bdi->min_ratio) + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { @@ -63,6 +81,7 @@ static struct device_attribute bdi_dev_attrs[] = { __ATTR_RO(writeback_kb), __ATTR_RO(dirty_kb), __ATTR_RO(bdi_dirty_kb), + __ATTR_RW(min_ratio), __ATTR_NULL, }; @@ -127,6 +146,8 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; + bdi->min_ratio = 0; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); if (err) -- cgit v1.2.3 From a42dde04152750426cc620fd277e80fffae2f65a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:36 -0700 Subject: mm: bdi: allow setting a maximum for the bdi dirty limit Add "max_ratio" to /sys/class/bdi. This indicates the maximum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in max_ratio_store(). - export bdi_set_max_ratio() to modules - limit bdi_dirty with bdi->max_ratio - document new sysfs attribute Signed-off-by: Peter Zijlstra Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4967fb176e53..08361b6aad50 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio) +static ssize_t max_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_max_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(max_ratio, bdi->max_ratio) + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { @@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = { __ATTR_RO(dirty_kb), __ATTR_RO(bdi_dirty_kb), __ATTR_RW(min_ratio), + __ATTR_RW(max_ratio), __ATTR_NULL, }; @@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; bdi->min_ratio = 0; + bdi->max_ratio = 100; + bdi->max_prop_frac = PROP_FRAC_BASE; for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); -- cgit v1.2.3 From 76f1418b485da2707531178e517bbb5cf06b3c76 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:36 -0700 Subject: mm: bdi: move statistics to debugfs Move BDI statistics to debugfs: /sys/kernel/debug/bdi//stats Use postcore_initcall() to initialize the sysfs class and debugfs, because debugfs is initialized in core_initcall(). Update descriptions in ABI documentation. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 98 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 20 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 08361b6aad50..7c4f9e097095 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,6 +10,80 @@ static struct class *bdi_class; +#ifdef CONFIG_DEBUG_FS +#include +#include + +static struct dentry *bdi_debug_root; + +static void bdi_debug_init(void) +{ + bdi_debug_root = debugfs_create_dir("bdi", NULL); +} + +static int bdi_debug_stats_show(struct seq_file *m, void *v) +{ + struct backing_dev_info *bdi = m->private; + long background_thresh; + long dirty_thresh; + long bdi_thresh; + + get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); + +#define K(x) ((x) << (PAGE_SHIFT - 10)) + seq_printf(m, + "BdiWriteback: %8lu kB\n" + "BdiReclaimable: %8lu kB\n" + "BdiDirtyThresh: %8lu kB\n" + "DirtyThresh: %8lu kB\n" + "BackgroundThresh: %8lu kB\n", + (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), + (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), + K(bdi_thresh), + K(dirty_thresh), + K(background_thresh)); +#undef K + + return 0; +} + +static int bdi_debug_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, bdi_debug_stats_show, inode->i_private); +} + +static const struct file_operations bdi_debug_stats_fops = { + .open = bdi_debug_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +{ + bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); + bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, + bdi, &bdi_debug_stats_fops); +} + +static void bdi_debug_unregister(struct backing_dev_info *bdi) +{ + debugfs_remove(bdi->debug_stats); + debugfs_remove(bdi->debug_dir); +} +#else +static inline void bdi_debug_init(void) +{ +} +static inline void bdi_debug_register(struct backing_dev_info *bdi, + const char *name) +{ +} +static inline void bdi_debug_unregister(struct backing_dev_info *bdi) +{ +} +#endif + static ssize_t read_ahead_kb_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -40,21 +114,6 @@ static ssize_t name##_show(struct device *dev, \ BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) -BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE))) -BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK))) - -static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) -{ - unsigned long thresh[3]; - - get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi); - - return thresh[i]; -} - -BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) -BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) - static ssize_t min_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -95,10 +154,6 @@ BDI_SHOW(max_ratio, bdi->max_ratio) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), - __ATTR_RO(reclaimable_kb), - __ATTR_RO(writeback_kb), - __ATTR_RO(dirty_kb), - __ATTR_RO(bdi_dirty_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), __ATTR_NULL, @@ -108,10 +163,11 @@ static __init int bdi_class_init(void) { bdi_class = class_create(THIS_MODULE, "bdi"); bdi_class->dev_attrs = bdi_dev_attrs; + bdi_debug_init(); return 0; } -core_initcall(bdi_class_init); +postcore_initcall(bdi_class_init); int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) @@ -136,6 +192,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, bdi->dev = dev; dev_set_drvdata(bdi->dev, bdi); + bdi_debug_register(bdi, name); exit: kfree(name); @@ -152,6 +209,7 @@ EXPORT_SYMBOL(bdi_register_dev); void bdi_unregister(struct backing_dev_info *bdi) { if (bdi->dev) { + bdi_debug_unregister(bdi); device_unregister(bdi->dev); bdi->dev = NULL; } -- cgit v1.2.3