summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/zram/zram_drv.c999
-rw-r--r--drivers/block/zram/zram_drv.h15
-rw-r--r--drivers/misc/Kconfig2
-rw-r--r--drivers/misc/vmw_balloon.c105
-rw-r--r--drivers/virtio/Kconfig2
-rw-r--r--drivers/virtio/virtio_balloon.c64
6 files changed, 627 insertions, 560 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 5759823d6314..61d3e2c74901 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -12,8 +12,7 @@
*
*/
-#define KMSG_COMPONENT "zram"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zram: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
@@ -56,13 +55,10 @@ static size_t huge_class_size;
static const struct block_device_operations zram_devops;
-static void zram_free_page(struct zram *zram, size_t index);
-static int zram_read_from_zspool(struct zram *zram, struct page *page,
- u32 index);
-
+static void slot_free(struct zram *zram, u32 index);
#define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
-static void zram_slot_lock_init(struct zram *zram, u32 index)
+static void slot_lock_init(struct zram *zram, u32 index)
{
static struct lock_class_key __key;
@@ -82,9 +78,9 @@ static void zram_slot_lock_init(struct zram *zram, u32 index)
* 4) Use TRY lock variant when in atomic context
* - must check return value and handle locking failers
*/
-static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
+static __must_check bool slot_trylock(struct zram *zram, u32 index)
{
- unsigned long *lock = &zram->table[index].flags;
+ unsigned long *lock = &zram->table[index].__lock;
if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
@@ -95,18 +91,18 @@ static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
return false;
}
-static void zram_slot_lock(struct zram *zram, u32 index)
+static void slot_lock(struct zram *zram, u32 index)
{
- unsigned long *lock = &zram->table[index].flags;
+ unsigned long *lock = &zram->table[index].__lock;
mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
lock_acquired(slot_dep_map(zram, index), _RET_IP_);
}
-static void zram_slot_unlock(struct zram *zram, u32 index)
+static void slot_unlock(struct zram *zram, u32 index)
{
- unsigned long *lock = &zram->table[index].flags;
+ unsigned long *lock = &zram->table[index].__lock;
mutex_release(slot_dep_map(zram, index), _RET_IP_);
clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
@@ -122,52 +118,80 @@ static inline struct zram *dev_to_zram(struct device *dev)
return (struct zram *)dev_to_disk(dev)->private_data;
}
-static unsigned long zram_get_handle(struct zram *zram, u32 index)
+static unsigned long get_slot_handle(struct zram *zram, u32 index)
{
return zram->table[index].handle;
}
-static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
+static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle)
{
zram->table[index].handle = handle;
}
-static bool zram_test_flag(struct zram *zram, u32 index,
- enum zram_pageflags flag)
+static bool test_slot_flag(struct zram *zram, u32 index,
+ enum zram_pageflags flag)
{
- return zram->table[index].flags & BIT(flag);
+ return zram->table[index].attr.flags & BIT(flag);
}
-static void zram_set_flag(struct zram *zram, u32 index,
- enum zram_pageflags flag)
+static void set_slot_flag(struct zram *zram, u32 index,
+ enum zram_pageflags flag)
{
- zram->table[index].flags |= BIT(flag);
+ zram->table[index].attr.flags |= BIT(flag);
}
-static void zram_clear_flag(struct zram *zram, u32 index,
- enum zram_pageflags flag)
+static void clear_slot_flag(struct zram *zram, u32 index,
+ enum zram_pageflags flag)
{
- zram->table[index].flags &= ~BIT(flag);
+ zram->table[index].attr.flags &= ~BIT(flag);
}
-static size_t zram_get_obj_size(struct zram *zram, u32 index)
+static size_t get_slot_size(struct zram *zram, u32 index)
{
- return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
-static void zram_set_obj_size(struct zram *zram,
- u32 index, size_t size)
+static void set_slot_size(struct zram *zram, u32 index, size_t size)
{
- unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
+ unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT;
+
+ zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size;
+}
- zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
+static inline bool slot_allocated(struct zram *zram, u32 index)
+{
+ return get_slot_size(zram, index) ||
+ test_slot_flag(zram, index, ZRAM_SAME) ||
+ test_slot_flag(zram, index, ZRAM_WB);
+}
+
+static inline void set_slot_comp_priority(struct zram *zram, u32 index,
+ u32 prio)
+{
+ prio &= ZRAM_COMP_PRIORITY_MASK;
+ /*
+ * Clear previous priority value first, in case if we recompress
+ * further an already recompressed page
+ */
+ zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
+ ZRAM_COMP_PRIORITY_BIT1);
+ zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
}
-static inline bool zram_allocated(struct zram *zram, u32 index)
+static inline u32 get_slot_comp_priority(struct zram *zram, u32 index)
{
- return zram_get_obj_size(zram, index) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_WB);
+ u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1;
+
+ return prio & ZRAM_COMP_PRIORITY_MASK;
+}
+
+static void mark_slot_accessed(struct zram *zram, u32 index)
+{
+ clear_slot_flag(zram, index, ZRAM_IDLE);
+ clear_slot_flag(zram, index, ZRAM_PP_SLOT);
+#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
+ zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds();
+#endif
}
static inline void update_used_max(struct zram *zram, const unsigned long pages)
@@ -204,34 +228,6 @@ static inline bool is_partial_io(struct bio_vec *bvec)
}
#endif
-static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
-{
- prio &= ZRAM_COMP_PRIORITY_MASK;
- /*
- * Clear previous priority value first, in case if we recompress
- * further an already recompressed page
- */
- zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
- ZRAM_COMP_PRIORITY_BIT1);
- zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
-}
-
-static inline u32 zram_get_priority(struct zram *zram, u32 index)
-{
- u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
-
- return prio & ZRAM_COMP_PRIORITY_MASK;
-}
-
-static void zram_accessed(struct zram *zram, u32 index)
-{
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_clear_flag(zram, index, ZRAM_PP_SLOT);
-#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
- zram->table[index].ac_time = ktime_get_boottime();
-#endif
-}
-
#if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
struct zram_pp_slot {
unsigned long index;
@@ -267,9 +263,9 @@ static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
{
list_del_init(&pps->entry);
- zram_slot_lock(zram, pps->index);
- zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
- zram_slot_unlock(zram, pps->index);
+ slot_lock(zram, pps->index);
+ clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
+ slot_unlock(zram, pps->index);
kfree(pps);
}
@@ -308,10 +304,10 @@ static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
INIT_LIST_HEAD(&pps->entry);
pps->index = index;
- bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
+ bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
list_add(&pps->entry, &ctl->pp_buckets[bid]);
- zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
+ set_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
return true;
}
@@ -363,15 +359,14 @@ static bool page_same_filled(void *ptr, unsigned long *element)
return true;
}
-static ssize_t initstate_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t initstate_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
u32 val;
struct zram *zram = dev_to_zram(dev);
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
val = init_done(zram);
- up_read(&zram->init_lock);
return sysfs_emit(buf, "%u\n", val);
}
@@ -385,7 +380,8 @@ static ssize_t disksize_show(struct device *dev,
}
static ssize_t mem_limit_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+ struct device_attribute *attr, const char *buf,
+ size_t len)
{
u64 limit;
char *tmp;
@@ -395,15 +391,15 @@ static ssize_t mem_limit_store(struct device *dev,
if (buf == tmp) /* no chars parsed, invalid input */
return -EINVAL;
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
- up_write(&zram->init_lock);
return len;
}
static ssize_t mem_used_max_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+ struct device_attribute *attr,
+ const char *buf, size_t len)
{
int err;
unsigned long val;
@@ -413,12 +409,11 @@ static ssize_t mem_used_max_store(struct device *dev,
if (err || val != 0)
return -EINVAL;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
if (init_done(zram)) {
atomic_long_set(&zram->stats.max_used_pages,
zs_get_total_pages(zram->mem_pool));
}
- up_read(&zram->init_lock);
return len;
}
@@ -441,67 +436,66 @@ static void mark_idle(struct zram *zram, ktime_t cutoff)
*
* And ZRAM_WB slots simply cannot be ZRAM_IDLE.
*/
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index) ||
- zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME)) {
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ if (!slot_allocated(zram, index) ||
+ test_slot_flag(zram, index, ZRAM_WB) ||
+ test_slot_flag(zram, index, ZRAM_SAME)) {
+ slot_unlock(zram, index);
continue;
}
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
is_idle = !cutoff ||
- ktime_after(cutoff, zram->table[index].ac_time);
+ ktime_after(cutoff, zram->table[index].attr.ac_time);
#endif
if (is_idle)
- zram_set_flag(zram, index, ZRAM_IDLE);
+ set_slot_flag(zram, index, ZRAM_IDLE);
else
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
+ clear_slot_flag(zram, index, ZRAM_IDLE);
+ slot_unlock(zram, index);
}
}
-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t idle_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
- ktime_t cutoff_time = 0;
- ssize_t rv = -EINVAL;
+ ktime_t cutoff = 0;
if (!sysfs_streq(buf, "all")) {
/*
* If it did not parse as 'all' try to treat it as an integer
* when we have memory tracking enabled.
*/
- u64 age_sec;
+ u32 age_sec;
- if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec))
- cutoff_time = ktime_sub(ktime_get_boottime(),
- ns_to_ktime(age_sec * NSEC_PER_SEC));
+ if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) &&
+ !kstrtouint(buf, 0, &age_sec))
+ cutoff = ktime_sub((u32)ktime_get_boottime_seconds(),
+ age_sec);
else
- goto out;
+ return -EINVAL;
}
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
if (!init_done(zram))
- goto out_unlock;
+ return -EINVAL;
/*
- * A cutoff_time of 0 marks everything as idle, this is the
+ * A cutoff of 0 marks everything as idle, this is the
* "all" behavior.
*/
- mark_idle(zram, cutoff_time);
- rv = len;
-
-out_unlock:
- up_read(&zram->init_lock);
-out:
- return rv;
+ mark_idle(zram, cutoff);
+ return len;
}
#ifdef CONFIG_ZRAM_WRITEBACK
#define INVALID_BDEV_BLOCK (~0UL)
+static int read_from_zspool_raw(struct zram *zram, struct page *page,
+ u32 index);
+static int read_from_zspool(struct zram *zram, struct page *page, u32 index);
+
struct zram_wb_ctl {
/* idle list is accessed only by the writeback task, no concurency */
struct list_head idle_reqs;
@@ -522,23 +516,86 @@ struct zram_wb_req {
struct list_head entry;
};
+struct zram_rb_req {
+ struct work_struct work;
+ struct zram *zram;
+ struct page *page;
+ /* The read bio for backing device */
+ struct bio *bio;
+ unsigned long blk_idx;
+ union {
+ /* The original bio to complete (async read) */
+ struct bio *parent;
+ /* error status (sync read) */
+ int error;
+ };
+ u32 index;
+};
+
+#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
+static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ guard(rwsem_read)(&zram->dev_lock);
+ ret = sysfs_emit(buf,
+ "%8llu %8llu %8llu\n",
+ FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
+ FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
+ FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
+
+ return ret;
+}
+
+static ssize_t writeback_compressed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ bool val;
+
+ if (kstrtobool(buf, &val))
+ return -EINVAL;
+
+ guard(rwsem_write)(&zram->dev_lock);
+ if (init_done(zram)) {
+ return -EBUSY;
+ }
+
+ zram->wb_compressed = val;
+
+ return len;
+}
+
+static ssize_t writeback_compressed_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ bool val;
+ struct zram *zram = dev_to_zram(dev);
+
+ guard(rwsem_read)(&zram->dev_lock);
+ val = zram->wb_compressed;
+
+ return sysfs_emit(buf, "%d\n", val);
+}
+
static ssize_t writeback_limit_enable_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
u64 val;
- ssize_t ret = -EINVAL;
if (kstrtoull(buf, 10, &val))
- return ret;
+ return -EINVAL;
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
zram->wb_limit_enable = val;
- up_write(&zram->init_lock);
- ret = len;
- return ret;
+ return len;
}
static ssize_t writeback_limit_enable_show(struct device *dev,
@@ -548,9 +605,8 @@ static ssize_t writeback_limit_enable_show(struct device *dev,
bool val;
struct zram *zram = dev_to_zram(dev);
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
val = zram->wb_limit_enable;
- up_read(&zram->init_lock);
return sysfs_emit(buf, "%d\n", val);
}
@@ -561,10 +617,9 @@ static ssize_t writeback_limit_store(struct device *dev,
{
struct zram *zram = dev_to_zram(dev);
u64 val;
- ssize_t ret = -EINVAL;
if (kstrtoull(buf, 10, &val))
- return ret;
+ return -EINVAL;
/*
* When the page size is greater than 4KB, if bd_wb_limit is set to
@@ -576,12 +631,10 @@ static ssize_t writeback_limit_store(struct device *dev,
*/
val = rounddown(val, PAGE_SIZE / 4096);
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
zram->bd_wb_limit = val;
- up_write(&zram->init_lock);
- ret = len;
- return ret;
+ return len;
}
static ssize_t writeback_limit_show(struct device *dev,
@@ -590,9 +643,8 @@ static ssize_t writeback_limit_show(struct device *dev,
u64 val;
struct zram *zram = dev_to_zram(dev);
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
val = zram->bd_wb_limit;
- up_read(&zram->init_lock);
return sysfs_emit(buf, "%llu\n", val);
}
@@ -610,9 +662,8 @@ static ssize_t writeback_batch_size_store(struct device *dev,
if (!val)
return -EINVAL;
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
zram->wb_batch_size = val;
- up_write(&zram->init_lock);
return len;
}
@@ -624,9 +675,8 @@ static ssize_t writeback_batch_size_show(struct device *dev,
u32 val;
struct zram *zram = dev_to_zram(dev);
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
val = zram->wb_batch_size;
- up_read(&zram->init_lock);
return sysfs_emit(buf, "%u\n", val);
}
@@ -646,37 +696,33 @@ static void reset_bdev(struct zram *zram)
}
static ssize_t backing_dev_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct file *file;
struct zram *zram = dev_to_zram(dev);
char *p;
ssize_t ret;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
file = zram->backing_dev;
if (!file) {
memcpy(buf, "none\n", 5);
- up_read(&zram->init_lock);
return 5;
}
p = file_path(file, buf, PAGE_SIZE - 1);
- if (IS_ERR(p)) {
- ret = PTR_ERR(p);
- goto out;
- }
+ if (IS_ERR(p))
+ return PTR_ERR(p);
ret = strlen(p);
memmove(buf, p, ret);
buf[ret++] = '\n';
-out:
- up_read(&zram->init_lock);
return ret;
}
static ssize_t backing_dev_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+ struct device_attribute *attr, const char *buf,
+ size_t len)
{
char *file_name;
size_t sz;
@@ -691,7 +737,7 @@ static ssize_t backing_dev_store(struct device *dev,
if (!file_name)
return -ENOMEM;
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
if (init_done(zram)) {
pr_info("Can't setup backing device for initialized device\n");
err = -EBUSY;
@@ -739,7 +785,6 @@ static ssize_t backing_dev_store(struct device *dev,
zram->backing_dev = backing_dev;
zram->bitmap = bitmap;
zram->nr_pages = nr_pages;
- up_write(&zram->init_lock);
pr_info("setup backing device %s\n", file_name);
kfree(file_name);
@@ -751,8 +796,6 @@ out:
if (backing_dev)
filp_close(backing_dev, NULL);
- up_write(&zram->init_lock);
-
kfree(file_name);
return err;
@@ -780,18 +823,6 @@ static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
atomic64_dec(&zram->stats.bd_count);
}
-static void read_from_bdev_async(struct zram *zram, struct page *page,
- unsigned long entry, struct bio *parent)
-{
- struct bio *bio;
-
- bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
- bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
- __bio_add_page(bio, page, PAGE_SIZE, 0);
- bio_chain(bio, parent);
- submit_bio(bio);
-}
-
static void release_wb_req(struct zram_wb_req *req)
{
__free_page(req->page);
@@ -870,7 +901,7 @@ release_wb_ctl:
static void zram_account_writeback_rollback(struct zram *zram)
{
- lockdep_assert_held_read(&zram->init_lock);
+ lockdep_assert_held_write(&zram->dev_lock);
if (zram->wb_limit_enable)
zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12);
@@ -878,7 +909,7 @@ static void zram_account_writeback_rollback(struct zram *zram)
static void zram_account_writeback_submit(struct zram *zram)
{
- lockdep_assert_held_read(&zram->init_lock);
+ lockdep_assert_held_write(&zram->dev_lock);
if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
@@ -886,8 +917,9 @@ static void zram_account_writeback_submit(struct zram *zram)
static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
{
- u32 index = req->pps->index;
- int err;
+ u32 size, index = req->pps->index;
+ int err, prio;
+ bool huge;
err = blk_status_to_errno(req->bio.bi_status);
if (err) {
@@ -901,7 +933,7 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
}
atomic64_inc(&zram->stats.bd_writes);
- zram_slot_lock(zram, index);
+ slot_lock(zram, index);
/*
* We release slot lock during writeback so slot can change under us:
* slot_free() or slot_free() and zram_write_page(). In both cases
@@ -909,18 +941,36 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
* set ZRAM_PP_SLOT on such slots until current post-processing
* finishes.
*/
- if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) {
+ if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) {
zram_release_bdev_block(zram, req->blk_idx);
goto out;
}
- zram_free_page(zram, index);
- zram_set_flag(zram, index, ZRAM_WB);
- zram_set_handle(zram, index, req->blk_idx);
+ if (zram->wb_compressed) {
+ /*
+ * ZRAM_WB slots get freed, we need to preserve data required
+ * for read decompression.
+ */
+ size = get_slot_size(zram, index);
+ prio = get_slot_comp_priority(zram, index);
+ huge = test_slot_flag(zram, index, ZRAM_HUGE);
+ }
+
+ slot_free(zram, index);
+ set_slot_flag(zram, index, ZRAM_WB);
+ set_slot_handle(zram, index, req->blk_idx);
+
+ if (zram->wb_compressed) {
+ if (huge)
+ set_slot_flag(zram, index, ZRAM_HUGE);
+ set_slot_size(zram, index, size);
+ set_slot_comp_priority(zram, index, prio);
+ }
+
atomic64_inc(&zram->stats.pages_stored);
out:
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
return 0;
}
@@ -1041,18 +1091,22 @@ static int zram_writeback_slots(struct zram *zram,
}
index = pps->index;
- zram_slot_lock(zram, index);
+ slot_lock(zram, index);
/*
* scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so
* slots can change in the meantime. If slots are accessed or
* freed they lose ZRAM_PP_SLOT flag and hence we don't
* post-process them.
*/
- if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
+ if (!test_slot_flag(zram, index, ZRAM_PP_SLOT))
goto next;
- if (zram_read_from_zspool(zram, req->page, index))
+ if (zram->wb_compressed)
+ err = read_from_zspool_raw(zram, req->page, index);
+ else
+ err = read_from_zspool(zram, req->page, index);
+ if (err)
goto next;
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
/*
* From now on pp-slot is owned by the req, remove it from
@@ -1074,7 +1128,7 @@ static int zram_writeback_slots(struct zram *zram,
continue;
next:
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
release_pp_slot(zram, pps);
}
@@ -1167,27 +1221,27 @@ static int scan_slots_for_writeback(struct zram *zram, u32 mode,
while (index < hi) {
bool ok = true;
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
+ slot_lock(zram, index);
+ if (!slot_allocated(zram, index))
goto next;
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME))
+ if (test_slot_flag(zram, index, ZRAM_WB) ||
+ test_slot_flag(zram, index, ZRAM_SAME))
goto next;
if (mode & IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
+ !test_slot_flag(zram, index, ZRAM_IDLE))
goto next;
if (mode & HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
+ !test_slot_flag(zram, index, ZRAM_HUGE))
goto next;
if (mode & INCOMPRESSIBLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
goto next;
ok = place_pp_slot(zram, ctl, index);
next:
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
if (!ok)
break;
index++;
@@ -1209,33 +1263,21 @@ static ssize_t writeback_store(struct device *dev,
ssize_t ret = len;
int err, mode = 0;
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
+ if (!init_done(zram))
return -EINVAL;
- }
-
- /* Do not permit concurrent post-processing actions. */
- if (atomic_xchg(&zram->pp_in_progress, 1)) {
- up_read(&zram->init_lock);
- return -EAGAIN;
- }
- if (!zram->backing_dev) {
- ret = -ENODEV;
- goto release_init_lock;
- }
+ if (!zram->backing_dev)
+ return -ENODEV;
pp_ctl = init_pp_ctl();
- if (!pp_ctl) {
- ret = -ENOMEM;
- goto release_init_lock;
- }
+ if (!pp_ctl)
+ return -ENOMEM;
wb_ctl = init_wb_ctl(zram);
if (!wb_ctl) {
ret = -ENOMEM;
- goto release_init_lock;
+ goto out;
}
args = skip_spaces(buf);
@@ -1259,7 +1301,7 @@ static ssize_t writeback_store(struct device *dev,
err = parse_mode(param, &mode);
if (err) {
ret = err;
- goto release_init_lock;
+ goto out;
}
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
@@ -1270,7 +1312,7 @@ static ssize_t writeback_store(struct device *dev,
err = parse_mode(val, &mode);
if (err) {
ret = err;
- goto release_init_lock;
+ goto out;
}
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
@@ -1281,7 +1323,7 @@ static ssize_t writeback_store(struct device *dev,
err = parse_page_index(val, nr_pages, &lo, &hi);
if (err) {
ret = err;
- goto release_init_lock;
+ goto out;
}
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
@@ -1292,7 +1334,7 @@ static ssize_t writeback_store(struct device *dev,
err = parse_page_indexes(val, nr_pages, &lo, &hi);
if (err) {
ret = err;
- goto release_init_lock;
+ goto out;
}
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
@@ -1304,33 +1346,147 @@ static ssize_t writeback_store(struct device *dev,
if (err)
ret = err;
-release_init_lock:
+out:
release_pp_ctl(zram, pp_ctl);
release_wb_ctl(wb_ctl);
- atomic_set(&zram->pp_in_progress, 0);
- up_read(&zram->init_lock);
return ret;
}
-struct zram_work {
- struct work_struct work;
- struct zram *zram;
- unsigned long entry;
- struct page *page;
- int error;
-};
+static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index)
+{
+ struct zcomp_strm *zstrm;
+ unsigned int size;
+ int ret, prio;
+ void *src;
+
+ slot_lock(zram, index);
+ /* Since slot was unlocked we need to make sure it's still ZRAM_WB */
+ if (!test_slot_flag(zram, index, ZRAM_WB)) {
+ slot_unlock(zram, index);
+ /* We read some stale data, zero it out */
+ memset_page(page, 0, 0, PAGE_SIZE);
+ return -EIO;
+ }
+
+ if (test_slot_flag(zram, index, ZRAM_HUGE)) {
+ slot_unlock(zram, index);
+ return 0;
+ }
+
+ size = get_slot_size(zram, index);
+ prio = get_slot_comp_priority(zram, index);
+
+ zstrm = zcomp_stream_get(zram->comps[prio]);
+ src = kmap_local_page(page);
+ ret = zcomp_decompress(zram->comps[prio], zstrm, src, size,
+ zstrm->local_copy);
+ if (!ret)
+ copy_page(src, zstrm->local_copy);
+ kunmap_local(src);
+ zcomp_stream_put(zstrm);
+ slot_unlock(zram, index);
+
+ return ret;
+}
+
+static void zram_deferred_decompress(struct work_struct *w)
+{
+ struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
+ struct page *page = bio_first_page_all(req->bio);
+ struct zram *zram = req->zram;
+ u32 index = req->index;
+ int ret;
+
+ ret = decompress_bdev_page(zram, page, index);
+ if (ret)
+ req->parent->bi_status = BLK_STS_IOERR;
+
+ /* Decrement parent's ->remaining */
+ bio_endio(req->parent);
+ bio_put(req->bio);
+ kfree(req);
+}
-static void zram_sync_read(struct work_struct *work)
+static void zram_async_read_endio(struct bio *bio)
{
- struct zram_work *zw = container_of(work, struct zram_work, work);
+ struct zram_rb_req *req = bio->bi_private;
+ struct zram *zram = req->zram;
+
+ if (bio->bi_status) {
+ req->parent->bi_status = bio->bi_status;
+ bio_endio(req->parent);
+ bio_put(bio);
+ kfree(req);
+ return;
+ }
+
+ /*
+ * NOTE: zram_async_read_endio() is not exactly right place for this.
+ * Ideally, we need to do it after ZRAM_WB check, but this requires
+ * us to use wq path even on systems that don't enable compressed
+ * writeback, because we cannot take slot-lock in the current context.
+ *
+ * Keep the existing behavior for now.
+ */
+ if (zram->wb_compressed == false) {
+ /* No decompression needed, complete the parent IO */
+ bio_endio(req->parent);
+ bio_put(bio);
+ kfree(req);
+ return;
+ }
+
+ /*
+ * zram decompression is sleepable, so we need to deffer it to
+ * a preemptible context.
+ */
+ INIT_WORK(&req->work, zram_deferred_decompress);
+ queue_work(system_highpri_wq, &req->work);
+}
+
+static void read_from_bdev_async(struct zram *zram, struct page *page,
+ u32 index, unsigned long blk_idx,
+ struct bio *parent)
+{
+ struct zram_rb_req *req;
+ struct bio *bio;
+
+ req = kmalloc(sizeof(*req), GFP_NOIO);
+ if (!req)
+ return;
+
+ bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
+ if (!bio) {
+ kfree(req);
+ return;
+ }
+
+ req->zram = zram;
+ req->index = index;
+ req->blk_idx = blk_idx;
+ req->bio = bio;
+ req->parent = parent;
+
+ bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
+ bio->bi_private = req;
+ bio->bi_end_io = zram_async_read_endio;
+
+ __bio_add_page(bio, page, PAGE_SIZE, 0);
+ bio_inc_remaining(parent);
+ submit_bio(bio);
+}
+
+static void zram_sync_read(struct work_struct *w)
+{
+ struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
struct bio_vec bv;
struct bio bio;
- bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
- bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
- __bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
- zw->error = submit_bio_wait(&bio);
+ bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ);
+ bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
+ __bio_add_page(&bio, req->page, PAGE_SIZE, 0);
+ req->error = submit_bio_wait(&bio);
}
/*
@@ -1338,39 +1494,42 @@ static void zram_sync_read(struct work_struct *work)
* chained IO with parent IO in same context, it's a deadlock. To avoid that,
* use a worker thread context.
*/
-static int read_from_bdev_sync(struct zram *zram, struct page *page,
- unsigned long entry)
+static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index,
+ unsigned long blk_idx)
{
- struct zram_work work;
+ struct zram_rb_req req;
+
+ req.page = page;
+ req.zram = zram;
+ req.blk_idx = blk_idx;
- work.page = page;
- work.zram = zram;
- work.entry = entry;
+ INIT_WORK_ONSTACK(&req.work, zram_sync_read);
+ queue_work(system_dfl_wq, &req.work);
+ flush_work(&req.work);
+ destroy_work_on_stack(&req.work);
- INIT_WORK_ONSTACK(&work.work, zram_sync_read);
- queue_work(system_dfl_wq, &work.work);
- flush_work(&work.work);
- destroy_work_on_stack(&work.work);
+ if (req.error || zram->wb_compressed == false)
+ return req.error;
- return work.error;
+ return decompress_bdev_page(zram, page, index);
}
-static int read_from_bdev(struct zram *zram, struct page *page,
- unsigned long entry, struct bio *parent)
+static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
+ unsigned long blk_idx, struct bio *parent)
{
atomic64_inc(&zram->stats.bd_reads);
if (!parent) {
if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
return -EIO;
- return read_from_bdev_sync(zram, page, entry);
+ return read_from_bdev_sync(zram, page, index, blk_idx);
}
- read_from_bdev_async(zram, page, entry, parent);
+ read_from_bdev_async(zram, page, index, blk_idx, parent);
return 0;
}
#else
static inline void reset_bdev(struct zram *zram) {};
-static int read_from_bdev(struct zram *zram, struct page *page,
- unsigned long entry, struct bio *parent)
+static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
+ unsigned long blk_idx, struct bio *parent)
{
return -EIO;
}
@@ -1401,15 +1560,13 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
ssize_t index, written = 0;
struct zram *zram = file->private_data;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
- struct timespec64 ts;
kbuf = kvmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
if (!init_done(zram)) {
- up_read(&zram->init_lock);
kvfree(kbuf);
return -EINVAL;
}
@@ -1417,35 +1574,32 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
for (index = *ppos; index < nr_pages; index++) {
int copied;
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
+ slot_lock(zram, index);
+ if (!slot_allocated(zram, index))
goto next;
- ts = ktime_to_timespec64(zram->table[index].ac_time);
copied = snprintf(kbuf + written, count,
- "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
- index, (s64)ts.tv_sec,
- ts.tv_nsec / NSEC_PER_USEC,
- zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
- zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
- zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
- zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
- zram_get_priority(zram, index) ? 'r' : '.',
- zram_test_flag(zram, index,
+ "%12zd %12u.%06d %c%c%c%c%c%c\n",
+ index, zram->table[index].attr.ac_time, 0,
+ test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.',
+ test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.',
+ test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
+ test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
+ get_slot_comp_priority(zram, index) ? 'r' : '.',
+ test_slot_flag(zram, index,
ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
if (count <= copied) {
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
break;
}
written += copied;
count -= copied;
next:
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
*ppos += 1;
}
- up_read(&zram->init_lock);
if (copy_to_user(buf, kbuf, written))
written = -EFAULT;
kvfree(kbuf);
@@ -1512,16 +1666,14 @@ static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
return -EINVAL;
}
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
if (init_done(zram)) {
- up_write(&zram->init_lock);
kfree(compressor);
pr_info("Can't change algorithm for initialized device\n");
return -EBUSY;
}
comp_algorithm_set(zram, prio, compressor);
- up_write(&zram->init_lock);
return 0;
}
@@ -1642,9 +1794,8 @@ static ssize_t comp_algorithm_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);
ssize_t sz;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
- up_read(&zram->init_lock);
return sz;
}
@@ -1669,7 +1820,7 @@ static ssize_t recomp_algorithm_show(struct device *dev,
ssize_t sz = 0;
u32 prio;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
if (!zram->comp_algs[prio])
continue;
@@ -1677,7 +1828,6 @@ static ssize_t recomp_algorithm_show(struct device *dev,
sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
}
- up_read(&zram->init_lock);
return sz;
}
@@ -1723,42 +1873,38 @@ static ssize_t recomp_algorithm_store(struct device *dev,
}
#endif
-static ssize_t compact_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t compact_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
+ if (!init_done(zram))
return -EINVAL;
- }
zs_compact(zram->mem_pool);
- up_read(&zram->init_lock);
return len;
}
-static ssize_t io_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
ret = sysfs_emit(buf,
"%8llu %8llu 0 %8llu\n",
(u64)atomic64_read(&zram->stats.failed_reads),
(u64)atomic64_read(&zram->stats.failed_writes),
(u64)atomic64_read(&zram->stats.notify_free));
- up_read(&zram->init_lock);
return ret;
}
-static ssize_t mm_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct zram *zram = dev_to_zram(dev);
struct zs_pool_stats pool_stats;
@@ -1768,7 +1914,7 @@ static ssize_t mm_stat_show(struct device *dev,
memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
if (init_done(zram)) {
mem_used = zs_get_total_pages(zram->mem_pool);
zs_pool_stats(zram->mem_pool, &pool_stats);
@@ -1788,55 +1934,26 @@ static ssize_t mm_stat_show(struct device *dev,
atomic_long_read(&pool_stats.pages_compacted),
(u64)atomic64_read(&zram->stats.huge_pages),
(u64)atomic64_read(&zram->stats.huge_pages_since));
- up_read(&zram->init_lock);
return ret;
}
-#ifdef CONFIG_ZRAM_WRITEBACK
-#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
-static ssize_t bd_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
- ssize_t ret;
-
- down_read(&zram->init_lock);
- ret = sysfs_emit(buf,
- "%8llu %8llu %8llu\n",
- FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
- FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
- FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
- up_read(&zram->init_lock);
-
- return ret;
-}
-#endif
-
static ssize_t debug_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
int version = 1;
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
- down_read(&zram->init_lock);
+ guard(rwsem_read)(&zram->dev_lock);
ret = sysfs_emit(buf,
"version: %d\n0 %8llu\n",
version,
(u64)atomic64_read(&zram->stats.miss_free));
- up_read(&zram->init_lock);
return ret;
}
-static DEVICE_ATTR_RO(io_stat);
-static DEVICE_ATTR_RO(mm_stat);
-#ifdef CONFIG_ZRAM_WRITEBACK
-static DEVICE_ATTR_RO(bd_stat);
-#endif
-static DEVICE_ATTR_RO(debug_stat);
-
static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
@@ -1847,7 +1964,7 @@ static void zram_meta_free(struct zram *zram, u64 disksize)
/* Free all pages that are still in this zram device */
for (index = 0; index < num_pages; index++)
- zram_free_page(zram, index);
+ slot_free(zram, index);
zs_destroy_pool(zram->mem_pool);
vfree(zram->table);
@@ -1874,32 +1991,32 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
huge_class_size = zs_huge_class_size(zram->mem_pool);
for (index = 0; index < num_pages; index++)
- zram_slot_lock_init(zram, index);
+ slot_lock_init(zram, index);
return true;
}
-static void zram_free_page(struct zram *zram, size_t index)
+static void slot_free(struct zram *zram, u32 index)
{
unsigned long handle;
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
- zram->table[index].ac_time = 0;
+ zram->table[index].attr.ac_time = 0;
#endif
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
- zram_clear_flag(zram, index, ZRAM_PP_SLOT);
- zram_set_priority(zram, index, 0);
+ clear_slot_flag(zram, index, ZRAM_IDLE);
+ clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+ clear_slot_flag(zram, index, ZRAM_PP_SLOT);
+ set_slot_comp_priority(zram, index, 0);
- if (zram_test_flag(zram, index, ZRAM_HUGE)) {
- zram_clear_flag(zram, index, ZRAM_HUGE);
+ if (test_slot_flag(zram, index, ZRAM_HUGE)) {
+ clear_slot_flag(zram, index, ZRAM_HUGE);
atomic64_dec(&zram->stats.huge_pages);
}
- if (zram_test_flag(zram, index, ZRAM_WB)) {
- zram_clear_flag(zram, index, ZRAM_WB);
- zram_release_bdev_block(zram, zram_get_handle(zram, index));
+ if (test_slot_flag(zram, index, ZRAM_WB)) {
+ clear_slot_flag(zram, index, ZRAM_WB);
+ zram_release_bdev_block(zram, get_slot_handle(zram, index));
goto out;
}
@@ -1907,24 +2024,24 @@ static void zram_free_page(struct zram *zram, size_t index)
* No memory is allocated for same element filled pages.
* Simply clear same page flag.
*/
- if (zram_test_flag(zram, index, ZRAM_SAME)) {
- zram_clear_flag(zram, index, ZRAM_SAME);
+ if (test_slot_flag(zram, index, ZRAM_SAME)) {
+ clear_slot_flag(zram, index, ZRAM_SAME);
atomic64_dec(&zram->stats.same_pages);
goto out;
}
- handle = zram_get_handle(zram, index);
+ handle = get_slot_handle(zram, index);
if (!handle)
return;
zs_free(zram->mem_pool, handle);
- atomic64_sub(zram_get_obj_size(zram, index),
+ atomic64_sub(get_slot_size(zram, index),
&zram->stats.compr_data_size);
out:
atomic64_dec(&zram->stats.pages_stored);
- zram_set_handle(zram, index, 0);
- zram_set_obj_size(zram, index, 0);
+ set_slot_handle(zram, index, 0);
+ set_slot_size(zram, index, 0);
}
static int read_same_filled_page(struct zram *zram, struct page *page,
@@ -1933,7 +2050,7 @@ static int read_same_filled_page(struct zram *zram, struct page *page,
void *mem;
mem = kmap_local_page(page);
- zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index));
+ zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index));
kunmap_local(mem);
return 0;
}
@@ -1944,12 +2061,12 @@ static int read_incompressible_page(struct zram *zram, struct page *page,
unsigned long handle;
void *src, *dst;
- handle = zram_get_handle(zram, index);
- src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
+ handle = get_slot_handle(zram, index);
+ src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL);
dst = kmap_local_page(page);
copy_page(dst, src);
kunmap_local(dst);
- zs_obj_read_end(zram->mem_pool, handle, src);
+ zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src);
return 0;
}
@@ -1962,33 +2079,60 @@ static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
void *src, *dst;
int ret, prio;
- handle = zram_get_handle(zram, index);
- size = zram_get_obj_size(zram, index);
- prio = zram_get_priority(zram, index);
+ handle = get_slot_handle(zram, index);
+ size = get_slot_size(zram, index);
+ prio = get_slot_comp_priority(zram, index);
zstrm = zcomp_stream_get(zram->comps[prio]);
- src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
+ src = zs_obj_read_begin(zram->mem_pool, handle, size,
+ zstrm->local_copy);
dst = kmap_local_page(page);
ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
kunmap_local(dst);
- zs_obj_read_end(zram->mem_pool, handle, src);
+ zs_obj_read_end(zram->mem_pool, handle, size, src);
zcomp_stream_put(zstrm);
return ret;
}
+#if defined CONFIG_ZRAM_WRITEBACK
+static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index)
+{
+ struct zcomp_strm *zstrm;
+ unsigned long handle;
+ unsigned int size;
+ void *src;
+
+ handle = get_slot_handle(zram, index);
+ size = get_slot_size(zram, index);
+
+ /*
+ * We need to get stream just for ->local_copy buffer, in
+ * case if object spans two physical pages. No decompression
+ * takes place here, as we read raw compressed data.
+ */
+ zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
+ src = zs_obj_read_begin(zram->mem_pool, handle, size,
+ zstrm->local_copy);
+ memcpy_to_page(page, 0, src, size);
+ zs_obj_read_end(zram->mem_pool, handle, size, src);
+ zcomp_stream_put(zstrm);
+
+ return 0;
+}
+#endif
+
/*
* Reads (decompresses if needed) a page from zspool (zsmalloc).
* Corresponding ZRAM slot should be locked.
*/
-static int zram_read_from_zspool(struct zram *zram, struct page *page,
- u32 index)
+static int read_from_zspool(struct zram *zram, struct page *page, u32 index)
{
- if (zram_test_flag(zram, index, ZRAM_SAME) ||
- !zram_get_handle(zram, index))
+ if (test_slot_flag(zram, index, ZRAM_SAME) ||
+ !get_slot_handle(zram, index))
return read_same_filled_page(zram, page, index);
- if (!zram_test_flag(zram, index, ZRAM_HUGE))
+ if (!test_slot_flag(zram, index, ZRAM_HUGE))
return read_compressed_page(zram, page, index);
else
return read_incompressible_page(zram, page, index);
@@ -1999,20 +2143,20 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index,
{
int ret;
- zram_slot_lock(zram, index);
- if (!zram_test_flag(zram, index, ZRAM_WB)) {
+ slot_lock(zram, index);
+ if (!test_slot_flag(zram, index, ZRAM_WB)) {
/* Slot should be locked through out the function call */
- ret = zram_read_from_zspool(zram, page, index);
- zram_slot_unlock(zram, index);
+ ret = read_from_zspool(zram, page, index);
+ slot_unlock(zram, index);
} else {
- unsigned long blk_idx = zram_get_handle(zram, index);
+ unsigned long blk_idx = get_slot_handle(zram, index);
/*
* The slot should be unlocked before reading from the backing
* device.
*/
- zram_slot_unlock(zram, index);
- ret = read_from_bdev(zram, page, blk_idx, parent);
+ slot_unlock(zram, index);
+ ret = read_from_bdev(zram, page, index, blk_idx, parent);
}
/* Should NEVER happen. Return bio error if it does. */
@@ -2052,11 +2196,11 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
static int write_same_filled_page(struct zram *zram, unsigned long fill,
u32 index)
{
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_set_flag(zram, index, ZRAM_SAME);
- zram_set_handle(zram, index, fill);
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ slot_free(zram, index);
+ set_slot_flag(zram, index, ZRAM_SAME);
+ set_slot_handle(zram, index, fill);
+ slot_unlock(zram, index);
atomic64_inc(&zram->stats.same_pages);
atomic64_inc(&zram->stats.pages_stored);
@@ -2090,12 +2234,12 @@ static int write_incompressible_page(struct zram *zram, struct page *page,
zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
kunmap_local(src);
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_set_flag(zram, index, ZRAM_HUGE);
- zram_set_handle(zram, index, handle);
- zram_set_obj_size(zram, index, PAGE_SIZE);
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ slot_free(zram, index);
+ set_slot_flag(zram, index, ZRAM_HUGE);
+ set_slot_handle(zram, index, handle);
+ set_slot_size(zram, index, PAGE_SIZE);
+ slot_unlock(zram, index);
atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.huge_pages);
@@ -2155,11 +2299,11 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index)
zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
zcomp_stream_put(zstrm);
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_set_handle(zram, index, handle);
- zram_set_obj_size(zram, index, comp_len);
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ slot_free(zram, index);
+ set_slot_handle(zram, index, handle);
+ set_slot_size(zram, index, comp_len);
+ slot_unlock(zram, index);
/* Update stats */
atomic64_inc(&zram->stats.pages_stored);
@@ -2210,30 +2354,30 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max,
for (index = 0; index < nr_pages; index++) {
bool ok = true;
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
+ slot_lock(zram, index);
+ if (!slot_allocated(zram, index))
goto next;
if (mode & RECOMPRESS_IDLE &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
+ !test_slot_flag(zram, index, ZRAM_IDLE))
goto next;
if (mode & RECOMPRESS_HUGE &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
+ !test_slot_flag(zram, index, ZRAM_HUGE))
goto next;
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ if (test_slot_flag(zram, index, ZRAM_WB) ||
+ test_slot_flag(zram, index, ZRAM_SAME) ||
+ test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
goto next;
/* Already compressed with same of higher priority */
- if (zram_get_priority(zram, index) + 1 >= prio_max)
+ if (get_slot_comp_priority(zram, index) + 1 >= prio_max)
goto next;
ok = place_pp_slot(zram, ctl, index);
next:
- zram_slot_unlock(zram, index);
+ slot_unlock(zram, index);
if (!ok)
break;
}
@@ -2262,18 +2406,18 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page,
void *src;
int ret = 0;
- handle_old = zram_get_handle(zram, index);
+ handle_old = get_slot_handle(zram, index);
if (!handle_old)
return -EINVAL;
- comp_len_old = zram_get_obj_size(zram, index);
+ comp_len_old = get_slot_size(zram, index);
/*
* Do not recompress objects that are already "small enough".
*/
if (comp_len_old < threshold)
return 0;
- ret = zram_read_from_zspool(zram, page, index);
+ ret = read_from_zspool(zram, page, index);
if (ret)
return ret;
@@ -2282,11 +2426,11 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page,
* we don't preserve IDLE flag and don't incorrectly pick this entry
* for different post-processing type (e.g. writeback).
*/
- zram_clear_flag(zram, index, ZRAM_IDLE);
+ clear_slot_flag(zram, index, ZRAM_IDLE);
class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
- prio = max(prio, zram_get_priority(zram, index) + 1);
+ prio = max(prio, get_slot_comp_priority(zram, index) + 1);
/*
* Recompression slots scan should not select slots that are
* already compressed with a higher priority algorithm, but
@@ -2353,7 +2497,7 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page,
*/
if (prio < zram->num_active_comps)
return 0;
- zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+ set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
return 0;
}
@@ -2362,14 +2506,15 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page,
* avoid direct reclaim. Allocation error is not fatal since
* we still have the old object in the mem_pool.
*
- * XXX: technically, the node we really want here is the node that holds
- * the original compressed data. But that would require us to modify
- * zsmalloc API to return this information. For now, we will make do with
- * the node of the page allocated for recompression.
+ * XXX: technically, the node we really want here is the node that
+ * holds the original compressed data. But that would require us to
+ * modify zsmalloc API to return this information. For now, we will
+ * make do with the node of the page allocated for recompression.
*/
handle_new = zs_malloc(zram->mem_pool, comp_len_new,
GFP_NOIO | __GFP_NOWARN |
- __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
+ __GFP_HIGHMEM | __GFP_MOVABLE,
+ page_to_nid(page));
if (IS_ERR_VALUE(handle_new)) {
zcomp_stream_put(zstrm);
return PTR_ERR((void *)handle_new);
@@ -2378,10 +2523,10 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page,
zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
zcomp_stream_put(zstrm);
- zram_free_page(zram, index);
- zram_set_handle(zram, index, handle_new);
- zram_set_obj_size(zram, index, comp_len_new);
- zram_set_priority(zram, index, prio);
+ slot_free(zram, index);
+ set_slot_handle(zram, index, handle_new);
+ set_slot_size(zram, index, comp_len_new);
+ set_slot_comp_priority(zram, index, prio);
atomic64_add(comp_len_new, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
@@ -2466,17 +2611,9 @@ static ssize_t recompress_store(struct device *dev,
if (threshold >= huge_class_size)
return -EINVAL;
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- ret = -EINVAL;
- goto release_init_lock;
- }
-
- /* Do not permit concurrent post-processing actions. */
- if (atomic_xchg(&zram->pp_in_progress, 1)) {
- up_read(&zram->init_lock);
- return -EAGAIN;
- }
+ guard(rwsem_write)(&zram->dev_lock);
+ if (!init_done(zram))
+ return -EINVAL;
if (algo) {
bool found = false;
@@ -2494,26 +2631,26 @@ static ssize_t recompress_store(struct device *dev,
if (!found) {
ret = -EINVAL;
- goto release_init_lock;
+ goto out;
}
}
prio_max = min(prio_max, (u32)zram->num_active_comps);
if (prio >= prio_max) {
ret = -EINVAL;
- goto release_init_lock;
+ goto out;
}
page = alloc_page(GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
- goto release_init_lock;
+ goto out;
}
ctl = init_pp_ctl();
if (!ctl) {
ret = -ENOMEM;
- goto release_init_lock;
+ goto out;
}
scan_slots_for_recompress(zram, mode, prio_max, ctl);
@@ -2525,15 +2662,15 @@ static ssize_t recompress_store(struct device *dev,
if (!num_recomp_pages)
break;
- zram_slot_lock(zram, pps->index);
- if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
+ slot_lock(zram, pps->index);
+ if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT))
goto next;
err = recompress_slot(zram, pps->index, page,
&num_recomp_pages, threshold,
prio, prio_max);
next:
- zram_slot_unlock(zram, pps->index);
+ slot_unlock(zram, pps->index);
release_pp_slot(zram, pps);
if (err) {
@@ -2544,12 +2681,10 @@ next:
cond_resched();
}
-release_init_lock:
+out:
if (page)
__free_page(page);
release_pp_ctl(zram, ctl);
- atomic_set(&zram->pp_in_progress, 0);
- up_read(&zram->init_lock);
return ret;
}
#endif
@@ -2580,9 +2715,9 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio)
}
while (n >= PAGE_SIZE) {
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ slot_free(zram, index);
+ slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
index++;
n -= PAGE_SIZE;
@@ -2611,9 +2746,9 @@ static void zram_bio_read(struct zram *zram, struct bio *bio)
}
flush_dcache_page(bv.bv_page);
- zram_slot_lock(zram, index);
- zram_accessed(zram, index);
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ mark_slot_accessed(zram, index);
+ slot_unlock(zram, index);
bio_advance_iter_single(bio, &iter, bv.bv_len);
} while (iter.bi_size);
@@ -2641,9 +2776,9 @@ static void zram_bio_write(struct zram *zram, struct bio *bio)
break;
}
- zram_slot_lock(zram, index);
- zram_accessed(zram, index);
- zram_slot_unlock(zram, index);
+ slot_lock(zram, index);
+ mark_slot_accessed(zram, index);
+ slot_unlock(zram, index);
bio_advance_iter_single(bio, &iter, bv.bv_len);
} while (iter.bi_size);
@@ -2684,13 +2819,13 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
atomic64_inc(&zram->stats.notify_free);
- if (!zram_slot_trylock(zram, index)) {
+ if (!slot_trylock(zram, index)) {
atomic64_inc(&zram->stats.miss_free);
return;
}
- zram_free_page(zram, index);
- zram_slot_unlock(zram, index);
+ slot_free(zram, index);
+ slot_unlock(zram, index);
}
static void zram_comp_params_reset(struct zram *zram)
@@ -2728,7 +2863,7 @@ static void zram_destroy_comps(struct zram *zram)
static void zram_reset_device(struct zram *zram)
{
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
zram->limit_pages = 0;
@@ -2740,15 +2875,13 @@ static void zram_reset_device(struct zram *zram)
zram->disksize = 0;
zram_destroy_comps(zram);
memset(&zram->stats, 0, sizeof(zram->stats));
- atomic_set(&zram->pp_in_progress, 0);
reset_bdev(zram);
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
- up_write(&zram->init_lock);
}
-static ssize_t disksize_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t disksize_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
u64 disksize;
struct zcomp *comp;
@@ -2760,18 +2893,15 @@ static ssize_t disksize_store(struct device *dev,
if (!disksize)
return -EINVAL;
- down_write(&zram->init_lock);
+ guard(rwsem_write)(&zram->dev_lock);
if (init_done(zram)) {
pr_info("Cannot change disksize for initialized device\n");
- err = -EBUSY;
- goto out_unlock;
+ return -EBUSY;
}
disksize = PAGE_ALIGN(disksize);
- if (!zram_meta_alloc(zram, disksize)) {
- err = -ENOMEM;
- goto out_unlock;
- }
+ if (!zram_meta_alloc(zram, disksize))
+ return -ENOMEM;
for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
if (!zram->comp_algs[prio])
@@ -2791,15 +2921,12 @@ static ssize_t disksize_store(struct device *dev,
}
zram->disksize = disksize;
set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
- up_write(&zram->init_lock);
return len;
out_free_comps:
zram_destroy_comps(zram);
zram_meta_free(zram, disksize);
-out_unlock:
- up_write(&zram->init_lock);
return err;
}
@@ -2862,6 +2989,9 @@ static const struct block_device_operations zram_devops = {
.owner = THIS_MODULE
};
+static DEVICE_ATTR_RO(io_stat);
+static DEVICE_ATTR_RO(mm_stat);
+static DEVICE_ATTR_RO(debug_stat);
static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate);
@@ -2871,11 +3001,13 @@ static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_WO(idle);
static DEVICE_ATTR_RW(comp_algorithm);
#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RO(bd_stat);
static DEVICE_ATTR_RW(backing_dev);
static DEVICE_ATTR_WO(writeback);
static DEVICE_ATTR_RW(writeback_limit);
static DEVICE_ATTR_RW(writeback_limit_enable);
static DEVICE_ATTR_RW(writeback_batch_size);
+static DEVICE_ATTR_RW(writeback_compressed);
#endif
#ifdef CONFIG_ZRAM_MULTI_COMP
static DEVICE_ATTR_RW(recomp_algorithm);
@@ -2893,17 +3025,16 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_idle.attr,
&dev_attr_comp_algorithm.attr,
#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_bd_stat.attr,
&dev_attr_backing_dev.attr,
&dev_attr_writeback.attr,
&dev_attr_writeback_limit.attr,
&dev_attr_writeback_limit_enable.attr,
&dev_attr_writeback_batch_size.attr,
+ &dev_attr_writeback_compressed.attr,
#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
-#ifdef CONFIG_ZRAM_WRITEBACK
- &dev_attr_bd_stat.attr,
-#endif
&dev_attr_debug_stat.attr,
#ifdef CONFIG_ZRAM_MULTI_COMP
&dev_attr_recomp_algorithm.attr,
@@ -2957,9 +3088,10 @@ static int zram_add(void)
goto out_free_dev;
device_id = ret;
- init_rwsem(&zram->init_lock);
+ init_rwsem(&zram->dev_lock);
#ifdef CONFIG_ZRAM_WRITEBACK
zram->wb_batch_size = 32;
+ zram->wb_compressed = false;
#endif
/* gendisk structure */
@@ -2978,7 +3110,6 @@ static int zram_add(void)
zram->disk->fops = &zram_devops;
zram->disk->private_data = zram;
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
- atomic_set(&zram->pp_in_progress, 0);
zram_comp_params_reset(zram);
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
@@ -3139,7 +3270,7 @@ static int __init zram_init(void)
struct zram_table_entry zram_te;
int ret;
- BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8);
+ BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8);
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
zcomp_cpu_up_prepare, zcomp_cpu_dead);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index c6d94501376c..515a72d9c06f 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -65,10 +65,15 @@ enum zram_pageflags {
*/
struct zram_table_entry {
unsigned long handle;
- unsigned long flags;
+ union {
+ unsigned long __lock;
+ struct attr {
+ u32 flags;
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
- ktime_t ac_time;
+ u32 ac_time;
#endif
+ } attr;
+ };
struct lockdep_map dep_map;
};
@@ -106,8 +111,8 @@ struct zram {
struct zcomp *comps[ZRAM_MAX_COMPS];
struct zcomp_params params[ZRAM_MAX_COMPS];
struct gendisk *disk;
- /* Prevent concurrent execution of device init */
- struct rw_semaphore init_lock;
+ /* Locks the device either in exclusive or in shared mode */
+ struct rw_semaphore dev_lock;
/*
* the number of pages zram can consume for storing compressed data
*/
@@ -128,6 +133,7 @@ struct zram {
#ifdef CONFIG_ZRAM_WRITEBACK
struct file *backing_dev;
bool wb_limit_enable;
+ bool wb_compressed;
u32 wb_batch_size;
u64 bd_wb_limit;
struct block_device *bdev;
@@ -137,6 +143,5 @@ struct zram {
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
struct dentry *debugfs_dir;
#endif
- atomic_t pp_in_progress;
};
#endif
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index d7d41b054b98..5cc79d1517af 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -410,7 +410,7 @@ config DS1682
config VMWARE_BALLOON
tristate "VMware Balloon Driver"
depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
- select MEMORY_BALLOON
+ select BALLOON
help
This is VMware physical memory management driver which acts
like a "balloon" that can be inflated to reclaim physical pages
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index cc1d18b3df5c..216a16395968 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -29,7 +29,7 @@
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/balloon_compaction.h>
+#include <linux/balloon.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
#include <asm/hypervisor.h>
@@ -354,11 +354,16 @@ struct vmballoon {
/**
* @huge_pages - list of the inflated 2MB pages.
*
- * Protected by @b_dev_info.pages_lock .
+ * Protected by @huge_pages_lock.
*/
struct list_head huge_pages;
/**
+ * @huge_pages_lock: lock for the list of inflated 2MB pages.
+ */
+ spinlock_t huge_pages_lock;
+
+ /**
* @vmci_doorbell.
*
* Protected by @conf_sem.
@@ -987,7 +992,6 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b,
unsigned int *n_pages,
enum vmballoon_page_size_type page_size)
{
- unsigned long flags;
struct page *page;
if (page_size == VMW_BALLOON_4K_PAGE) {
@@ -995,9 +999,9 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b,
} else {
/*
* Keep the huge pages in a local list which is not available
- * for the balloon compaction mechanism.
+ * for the balloon page migration.
*/
- spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+ spin_lock(&b->huge_pages_lock);
list_for_each_entry(page, pages, lru) {
vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
@@ -1006,7 +1010,7 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b,
list_splice_init(pages, &b->huge_pages);
__count_vm_events(BALLOON_INFLATE, *n_pages *
vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
- spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+ spin_unlock(&b->huge_pages_lock);
}
*n_pages = 0;
@@ -1033,7 +1037,6 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b,
{
struct page *page, *tmp;
unsigned int i = 0;
- unsigned long flags;
/* In the case of 4k pages, use the compaction infrastructure */
if (page_size == VMW_BALLOON_4K_PAGE) {
@@ -1043,7 +1046,7 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b,
}
/* 2MB pages */
- spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+ spin_lock(&b->huge_pages_lock);
list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
@@ -1054,7 +1057,7 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b,
__count_vm_events(BALLOON_DEFLATE,
i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
- spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+ spin_unlock(&b->huge_pages_lock);
*n_pages = i;
}
@@ -1716,7 +1719,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
#endif /* CONFIG_DEBUG_FS */
-#ifdef CONFIG_BALLOON_COMPACTION
+#ifdef CONFIG_BALLOON_MIGRATION
/**
* vmballoon_migratepage() - migrates a balloon page.
* @b_dev_info: balloon device information descriptor.
@@ -1724,18 +1727,17 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
* @page: a ballooned page that should be migrated.
* @mode: migration mode, ignored.
*
- * This function is really open-coded, but that is according to the interface
- * that balloon_compaction provides.
- *
* Return: zero on success, -EAGAIN when migration cannot be performed
- * momentarily, and -EBUSY if migration failed and should be retried
- * with that specific page.
+ * momentarily, -EBUSY if migration failed and should be retried
+ * with that specific page, and -ENOENT when deflating @page
+ * succeeded but inflating @newpage failed, effectively deflating
+ * the balloon.
*/
static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
struct page *newpage, struct page *page,
enum migrate_mode mode)
{
- unsigned long status, flags;
+ unsigned long status;
struct vmballoon *b;
int ret = 0;
@@ -1773,14 +1775,6 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
goto out_unlock;
}
- /*
- * The page is isolated, so it is safe to delete it without holding
- * @pages_lock . We keep holding @comm_lock since we will need it in a
- * second.
- */
- balloon_page_finalize(page);
- put_page(page);
-
/* Inflate */
vmballoon_add_page(b, 0, newpage);
status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
@@ -1799,60 +1793,21 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
* change.
*/
atomic64_dec(&b->size);
- } else {
/*
- * Success. Take a reference for the page, and we will add it to
- * the list after acquiring the lock.
+ * Tell the core that we're deflating the old page and don't
+ * need the new page.
*/
- get_page(newpage);
- }
-
- /* Update the balloon list under the @pages_lock */
- spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
-
- /*
- * On inflation success, we already took a reference for the @newpage.
- * If we succeed just insert it to the list and update the statistics
- * under the lock.
- */
- if (status == VMW_BALLOON_SUCCESS) {
- balloon_page_insert(&b->b_dev_info, newpage);
- __count_vm_event(BALLOON_MIGRATE);
+ ret = -ENOENT;
}
-
- /*
- * We deflated successfully, so regardless to the inflation success, we
- * need to reduce the number of isolated_pages.
- */
- b->b_dev_info.isolated_pages--;
- spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
-
out_unlock:
up_read(&b->conf_sem);
return ret;
}
-
-/**
- * vmballoon_compaction_init() - initialized compaction for the balloon.
- *
- * @b: pointer to the balloon.
- *
- * If during the initialization a failure occurred, this function does not
- * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
- * case.
- *
- * Return: zero on success or error code on failure.
- */
-static __init void vmballoon_compaction_init(struct vmballoon *b)
-{
- b->b_dev_info.migratepage = vmballoon_migratepage;
-}
-
-#else /* CONFIG_BALLOON_COMPACTION */
-static inline void vmballoon_compaction_init(struct vmballoon *b)
-{
-}
-#endif /* CONFIG_BALLOON_COMPACTION */
+#else /* CONFIG_BALLOON_MIGRATION */
+int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode);
+#endif /* CONFIG_BALLOON_MIGRATION */
static int __init vmballoon_init(void)
{
@@ -1871,14 +1826,12 @@ static int __init vmballoon_init(void)
if (error)
return error;
- /*
- * Initialization of compaction must be done after the call to
- * balloon_devinfo_init() .
- */
balloon_devinfo_init(&balloon.b_dev_info);
- vmballoon_compaction_init(&balloon);
+ if (IS_ENABLED(CONFIG_BALLOON_MIGRATION))
+ balloon.b_dev_info.migratepage = vmballoon_migratepage;
INIT_LIST_HEAD(&balloon.huge_pages);
+ spin_lock_init(&balloon.huge_pages_lock);
spin_lock_init(&balloon.comm_lock);
init_rwsem(&balloon.conf_sem);
balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 6db5235a7693..ce5bc0d9ea28 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -112,7 +112,7 @@ config VIRTIO_PMEM
config VIRTIO_BALLOON
tristate "Virtio balloon driver"
depends on VIRTIO
- select MEMORY_BALLOON
+ select BALLOON
select PAGE_REPORTING
help
This driver supports increasing and decreasing the amount
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 74fe59f5a78c..4e549abe59ff 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -13,7 +13,7 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <linux/balloon_compaction.h>
+#include <linux/balloon.h>
#include <linux/oom.h>
#include <linux/wait.h>
#include <linux/mm.h>
@@ -242,8 +242,8 @@ static void set_page_pfns(struct virtio_balloon *vb,
static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
{
unsigned int num_allocated_pages;
+ struct page *page, *next;
unsigned int num_pfns;
- struct page *page;
LIST_HEAD(pages);
/* We can only do one array worth at a time. */
@@ -262,21 +262,19 @@ static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
break;
}
- balloon_page_push(&pages, page);
+ list_add(&page->lru, &pages);
}
mutex_lock(&vb->balloon_lock);
vb->num_pfns = 0;
- while ((page = balloon_page_pop(&pages))) {
+ list_for_each_entry_safe(page, next, &pages, lru) {
+ list_del(&page->lru);
balloon_page_enqueue(&vb->vb_dev_info, page);
set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
- if (!virtio_has_feature(vb->vdev,
- VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
- adjust_managed_page_count(page, -1);
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
}
@@ -295,9 +293,6 @@ static void release_pages_balloon(struct virtio_balloon *vb,
struct page *page, *next;
list_for_each_entry_safe(page, next, pages, lru) {
- if (!virtio_has_feature(vb->vdev,
- VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
- adjust_managed_page_count(page, 1);
list_del(&page->lru);
put_page(page); /* balloon reference */
}
@@ -480,15 +475,19 @@ static inline s64 towards_target(struct virtio_balloon *vb)
static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
unsigned long num_to_return)
{
- struct page *page;
- unsigned long num_returned;
+ unsigned long num_returned = 0;
+ struct page *page, *next;
+
+ if (unlikely(!num_to_return))
+ return 0;
spin_lock_irq(&vb->free_page_list_lock);
- for (num_returned = 0; num_returned < num_to_return; num_returned++) {
- page = balloon_page_pop(&vb->free_page_list);
- if (!page)
- break;
+
+ list_for_each_entry_safe(page, next, &vb->free_page_list, lru) {
+ list_del(&page->lru);
__free_pages(page, VIRTIO_BALLOON_HINT_BLOCK_ORDER);
+ if (++num_returned == num_to_return)
+ break;
}
vb->num_free_page_blocks -= num_returned;
spin_unlock_irq(&vb->free_page_list_lock);
@@ -723,7 +722,7 @@ static int get_free_page_and_send(struct virtio_balloon *vb)
}
virtqueue_kick(vq);
spin_lock_irq(&vb->free_page_list_lock);
- balloon_page_push(&vb->free_page_list, page);
+ list_add(&page->lru, &vb->free_page_list);
vb->num_free_page_blocks++;
spin_unlock_irq(&vb->free_page_list_lock);
} else {
@@ -803,7 +802,7 @@ static void report_free_page_func(struct work_struct *work)
}
}
-#ifdef CONFIG_BALLOON_COMPACTION
+#ifdef CONFIG_BALLOON_MIGRATION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
* a compaction thread. (called under page lock)
@@ -827,7 +826,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
{
struct virtio_balloon *vb = container_of(vb_dev_info,
struct virtio_balloon, vb_dev_info);
- unsigned long flags;
/*
* In order to avoid lock contention while migrating pages concurrently
@@ -840,25 +838,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
if (!mutex_trylock(&vb->balloon_lock))
return -EAGAIN;
- get_page(newpage); /* balloon reference */
-
- /*
- * When we migrate a page to a different zone and adjusted the
- * managed page count when inflating, we have to fixup the count of
- * both involved zones.
- */
- if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) &&
- page_zone(page) != page_zone(newpage)) {
- adjust_managed_page_count(page, 1);
- adjust_managed_page_count(newpage, -1);
- }
-
/* balloon's page migration 1st step -- inflate "newpage" */
- spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
- balloon_page_insert(vb_dev_info, newpage);
- vb_dev_info->isolated_pages--;
- __count_vm_event(BALLOON_MIGRATE);
- spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
set_page_pfns(vb, vb->pfns, newpage);
tell_host(vb, vb->inflate_vq);
@@ -869,13 +849,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
tell_host(vb, vb->deflate_vq);
mutex_unlock(&vb->balloon_lock);
-
- balloon_page_finalize(page);
- put_page(page); /* balloon reference */
-
return 0;
}
-#endif /* CONFIG_BALLOON_COMPACTION */
+#endif /* CONFIG_BALLOON_MIGRATION */
static unsigned long shrink_free_pages(struct virtio_balloon *vb,
unsigned long pages_to_free)
@@ -970,7 +946,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (err)
goto out_free_vb;
-#ifdef CONFIG_BALLOON_COMPACTION
+ if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+ vb->vb_dev_info.adjust_managed_page_count = true;
+#ifdef CONFIG_BALLOON_MIGRATION
vb->vb_dev_info.migratepage = virtballoon_migratepage;
#endif
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {