diff options
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/block/zram/zram_drv.c | 999 | ||||
| -rw-r--r-- | drivers/block/zram/zram_drv.h | 15 | ||||
| -rw-r--r-- | drivers/misc/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/misc/vmw_balloon.c | 105 | ||||
| -rw-r--r-- | drivers/virtio/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/virtio/virtio_balloon.c | 64 |
6 files changed, 627 insertions, 560 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 5759823d6314..61d3e2c74901 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -12,8 +12,7 @@ * */ -#define KMSG_COMPONENT "zram" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zram: " fmt #include <linux/module.h> #include <linux/kernel.h> @@ -56,13 +55,10 @@ static size_t huge_class_size; static const struct block_device_operations zram_devops; -static void zram_free_page(struct zram *zram, size_t index); -static int zram_read_from_zspool(struct zram *zram, struct page *page, - u32 index); - +static void slot_free(struct zram *zram, u32 index); #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) -static void zram_slot_lock_init(struct zram *zram, u32 index) +static void slot_lock_init(struct zram *zram, u32 index) { static struct lock_class_key __key; @@ -82,9 +78,9 @@ static void zram_slot_lock_init(struct zram *zram, u32 index) * 4) Use TRY lock variant when in atomic context * - must check return value and handle locking failers */ -static __must_check bool zram_slot_trylock(struct zram *zram, u32 index) +static __must_check bool slot_trylock(struct zram *zram, u32 index) { - unsigned long *lock = &zram->table[index].flags; + unsigned long *lock = &zram->table[index].__lock; if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); @@ -95,18 +91,18 @@ static __must_check bool zram_slot_trylock(struct zram *zram, u32 index) return false; } -static void zram_slot_lock(struct zram *zram, u32 index) +static void slot_lock(struct zram *zram, u32 index) { - unsigned long *lock = &zram->table[index].flags; + unsigned long *lock = &zram->table[index].__lock; mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); lock_acquired(slot_dep_map(zram, index), _RET_IP_); } -static void zram_slot_unlock(struct zram *zram, u32 index) +static void slot_unlock(struct zram *zram, u32 index) { - unsigned long *lock = &zram->table[index].flags; + unsigned long *lock = &zram->table[index].__lock; mutex_release(slot_dep_map(zram, index), _RET_IP_); clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); @@ -122,52 +118,80 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } -static unsigned long zram_get_handle(struct zram *zram, u32 index) +static unsigned long get_slot_handle(struct zram *zram, u32 index) { return zram->table[index].handle; } -static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) +static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle) { zram->table[index].handle = handle; } -static bool zram_test_flag(struct zram *zram, u32 index, - enum zram_pageflags flag) +static bool test_slot_flag(struct zram *zram, u32 index, + enum zram_pageflags flag) { - return zram->table[index].flags & BIT(flag); + return zram->table[index].attr.flags & BIT(flag); } -static void zram_set_flag(struct zram *zram, u32 index, - enum zram_pageflags flag) +static void set_slot_flag(struct zram *zram, u32 index, + enum zram_pageflags flag) { - zram->table[index].flags |= BIT(flag); + zram->table[index].attr.flags |= BIT(flag); } -static void zram_clear_flag(struct zram *zram, u32 index, - enum zram_pageflags flag) +static void clear_slot_flag(struct zram *zram, u32 index, + enum zram_pageflags flag) { - zram->table[index].flags &= ~BIT(flag); + zram->table[index].attr.flags &= ~BIT(flag); } -static size_t zram_get_obj_size(struct zram *zram, u32 index) +static size_t get_slot_size(struct zram *zram, u32 index) { - return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); + return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1); } -static void zram_set_obj_size(struct zram *zram, - u32 index, size_t size) +static void set_slot_size(struct zram *zram, u32 index, size_t size) { - unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; + unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT; + + zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size; +} - zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; +static inline bool slot_allocated(struct zram *zram, u32 index) +{ + return get_slot_size(zram, index) || + test_slot_flag(zram, index, ZRAM_SAME) || + test_slot_flag(zram, index, ZRAM_WB); +} + +static inline void set_slot_comp_priority(struct zram *zram, u32 index, + u32 prio) +{ + prio &= ZRAM_COMP_PRIORITY_MASK; + /* + * Clear previous priority value first, in case if we recompress + * further an already recompressed page + */ + zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK << + ZRAM_COMP_PRIORITY_BIT1); + zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); } -static inline bool zram_allocated(struct zram *zram, u32 index) +static inline u32 get_slot_comp_priority(struct zram *zram, u32 index) { - return zram_get_obj_size(zram, index) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_WB); + u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1; + + return prio & ZRAM_COMP_PRIORITY_MASK; +} + +static void mark_slot_accessed(struct zram *zram, u32 index) +{ + clear_slot_flag(zram, index, ZRAM_IDLE); + clear_slot_flag(zram, index, ZRAM_PP_SLOT); +#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME + zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds(); +#endif } static inline void update_used_max(struct zram *zram, const unsigned long pages) @@ -204,34 +228,6 @@ static inline bool is_partial_io(struct bio_vec *bvec) } #endif -static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) -{ - prio &= ZRAM_COMP_PRIORITY_MASK; - /* - * Clear previous priority value first, in case if we recompress - * further an already recompressed page - */ - zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << - ZRAM_COMP_PRIORITY_BIT1); - zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); -} - -static inline u32 zram_get_priority(struct zram *zram, u32 index) -{ - u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; - - return prio & ZRAM_COMP_PRIORITY_MASK; -} - -static void zram_accessed(struct zram *zram, u32 index) -{ - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_clear_flag(zram, index, ZRAM_PP_SLOT); -#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME - zram->table[index].ac_time = ktime_get_boottime(); -#endif -} - #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP struct zram_pp_slot { unsigned long index; @@ -267,9 +263,9 @@ static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) { list_del_init(&pps->entry); - zram_slot_lock(zram, pps->index); - zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); - zram_slot_unlock(zram, pps->index); + slot_lock(zram, pps->index); + clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT); + slot_unlock(zram, pps->index); kfree(pps); } @@ -308,10 +304,10 @@ static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, INIT_LIST_HEAD(&pps->entry); pps->index = index; - bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; + bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; list_add(&pps->entry, &ctl->pp_buckets[bid]); - zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); + set_slot_flag(zram, pps->index, ZRAM_PP_SLOT); return true; } @@ -363,15 +359,14 @@ static bool page_same_filled(void *ptr, unsigned long *element) return true; } -static ssize_t initstate_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, + char *buf) { u32 val; struct zram *zram = dev_to_zram(dev); - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); val = init_done(zram); - up_read(&zram->init_lock); return sysfs_emit(buf, "%u\n", val); } @@ -385,7 +380,8 @@ static ssize_t disksize_show(struct device *dev, } static ssize_t mem_limit_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, const char *buf, + size_t len) { u64 limit; char *tmp; @@ -395,15 +391,15 @@ static ssize_t mem_limit_store(struct device *dev, if (buf == tmp) /* no chars parsed, invalid input */ return -EINVAL; - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; - up_write(&zram->init_lock); return len; } static ssize_t mem_used_max_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, + const char *buf, size_t len) { int err; unsigned long val; @@ -413,12 +409,11 @@ static ssize_t mem_used_max_store(struct device *dev, if (err || val != 0) return -EINVAL; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); if (init_done(zram)) { atomic_long_set(&zram->stats.max_used_pages, zs_get_total_pages(zram->mem_pool)); } - up_read(&zram->init_lock); return len; } @@ -441,67 +436,66 @@ static void mark_idle(struct zram *zram, ktime_t cutoff) * * And ZRAM_WB slots simply cannot be ZRAM_IDLE. */ - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index) || - zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME)) { - zram_slot_unlock(zram, index); + slot_lock(zram, index); + if (!slot_allocated(zram, index) || + test_slot_flag(zram, index, ZRAM_WB) || + test_slot_flag(zram, index, ZRAM_SAME)) { + slot_unlock(zram, index); continue; } #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME is_idle = !cutoff || - ktime_after(cutoff, zram->table[index].ac_time); + ktime_after(cutoff, zram->table[index].attr.ac_time); #endif if (is_idle) - zram_set_flag(zram, index, ZRAM_IDLE); + set_slot_flag(zram, index, ZRAM_IDLE); else - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); + clear_slot_flag(zram, index, ZRAM_IDLE); + slot_unlock(zram, index); } } -static ssize_t idle_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static ssize_t idle_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); - ktime_t cutoff_time = 0; - ssize_t rv = -EINVAL; + ktime_t cutoff = 0; if (!sysfs_streq(buf, "all")) { /* * If it did not parse as 'all' try to treat it as an integer * when we have memory tracking enabled. */ - u64 age_sec; + u32 age_sec; - if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec)) - cutoff_time = ktime_sub(ktime_get_boottime(), - ns_to_ktime(age_sec * NSEC_PER_SEC)); + if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && + !kstrtouint(buf, 0, &age_sec)) + cutoff = ktime_sub((u32)ktime_get_boottime_seconds(), + age_sec); else - goto out; + return -EINVAL; } - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); if (!init_done(zram)) - goto out_unlock; + return -EINVAL; /* - * A cutoff_time of 0 marks everything as idle, this is the + * A cutoff of 0 marks everything as idle, this is the * "all" behavior. */ - mark_idle(zram, cutoff_time); - rv = len; - -out_unlock: - up_read(&zram->init_lock); -out: - return rv; + mark_idle(zram, cutoff); + return len; } #ifdef CONFIG_ZRAM_WRITEBACK #define INVALID_BDEV_BLOCK (~0UL) +static int read_from_zspool_raw(struct zram *zram, struct page *page, + u32 index); +static int read_from_zspool(struct zram *zram, struct page *page, u32 index); + struct zram_wb_ctl { /* idle list is accessed only by the writeback task, no concurency */ struct list_head idle_reqs; @@ -522,23 +516,86 @@ struct zram_wb_req { struct list_head entry; }; +struct zram_rb_req { + struct work_struct work; + struct zram *zram; + struct page *page; + /* The read bio for backing device */ + struct bio *bio; + unsigned long blk_idx; + union { + /* The original bio to complete (async read) */ + struct bio *parent; + /* error status (sync read) */ + int error; + }; + u32 index; +}; + +#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) +static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + ssize_t ret; + + guard(rwsem_read)(&zram->dev_lock); + ret = sysfs_emit(buf, + "%8llu %8llu %8llu\n", + FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), + FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), + FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); + + return ret; +} + +static ssize_t writeback_compressed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + bool val; + + if (kstrtobool(buf, &val)) + return -EINVAL; + + guard(rwsem_write)(&zram->dev_lock); + if (init_done(zram)) { + return -EBUSY; + } + + zram->wb_compressed = val; + + return len; +} + +static ssize_t writeback_compressed_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + bool val; + struct zram *zram = dev_to_zram(dev); + + guard(rwsem_read)(&zram->dev_lock); + val = zram->wb_compressed; + + return sysfs_emit(buf, "%d\n", val); +} + static ssize_t writeback_limit_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); u64 val; - ssize_t ret = -EINVAL; if (kstrtoull(buf, 10, &val)) - return ret; + return -EINVAL; - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); zram->wb_limit_enable = val; - up_write(&zram->init_lock); - ret = len; - return ret; + return len; } static ssize_t writeback_limit_enable_show(struct device *dev, @@ -548,9 +605,8 @@ static ssize_t writeback_limit_enable_show(struct device *dev, bool val; struct zram *zram = dev_to_zram(dev); - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); val = zram->wb_limit_enable; - up_read(&zram->init_lock); return sysfs_emit(buf, "%d\n", val); } @@ -561,10 +617,9 @@ static ssize_t writeback_limit_store(struct device *dev, { struct zram *zram = dev_to_zram(dev); u64 val; - ssize_t ret = -EINVAL; if (kstrtoull(buf, 10, &val)) - return ret; + return -EINVAL; /* * When the page size is greater than 4KB, if bd_wb_limit is set to @@ -576,12 +631,10 @@ static ssize_t writeback_limit_store(struct device *dev, */ val = rounddown(val, PAGE_SIZE / 4096); - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); zram->bd_wb_limit = val; - up_write(&zram->init_lock); - ret = len; - return ret; + return len; } static ssize_t writeback_limit_show(struct device *dev, @@ -590,9 +643,8 @@ static ssize_t writeback_limit_show(struct device *dev, u64 val; struct zram *zram = dev_to_zram(dev); - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); val = zram->bd_wb_limit; - up_read(&zram->init_lock); return sysfs_emit(buf, "%llu\n", val); } @@ -610,9 +662,8 @@ static ssize_t writeback_batch_size_store(struct device *dev, if (!val) return -EINVAL; - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); zram->wb_batch_size = val; - up_write(&zram->init_lock); return len; } @@ -624,9 +675,8 @@ static ssize_t writeback_batch_size_show(struct device *dev, u32 val; struct zram *zram = dev_to_zram(dev); - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); val = zram->wb_batch_size; - up_read(&zram->init_lock); return sysfs_emit(buf, "%u\n", val); } @@ -646,37 +696,33 @@ static void reset_bdev(struct zram *zram) } static ssize_t backing_dev_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct file *file; struct zram *zram = dev_to_zram(dev); char *p; ssize_t ret; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); file = zram->backing_dev; if (!file) { memcpy(buf, "none\n", 5); - up_read(&zram->init_lock); return 5; } p = file_path(file, buf, PAGE_SIZE - 1); - if (IS_ERR(p)) { - ret = PTR_ERR(p); - goto out; - } + if (IS_ERR(p)) + return PTR_ERR(p); ret = strlen(p); memmove(buf, p, ret); buf[ret++] = '\n'; -out: - up_read(&zram->init_lock); return ret; } static ssize_t backing_dev_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, const char *buf, + size_t len) { char *file_name; size_t sz; @@ -691,7 +737,7 @@ static ssize_t backing_dev_store(struct device *dev, if (!file_name) return -ENOMEM; - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); if (init_done(zram)) { pr_info("Can't setup backing device for initialized device\n"); err = -EBUSY; @@ -739,7 +785,6 @@ static ssize_t backing_dev_store(struct device *dev, zram->backing_dev = backing_dev; zram->bitmap = bitmap; zram->nr_pages = nr_pages; - up_write(&zram->init_lock); pr_info("setup backing device %s\n", file_name); kfree(file_name); @@ -751,8 +796,6 @@ out: if (backing_dev) filp_close(backing_dev, NULL); - up_write(&zram->init_lock); - kfree(file_name); return err; @@ -780,18 +823,6 @@ static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) atomic64_dec(&zram->stats.bd_count); } -static void read_from_bdev_async(struct zram *zram, struct page *page, - unsigned long entry, struct bio *parent) -{ - struct bio *bio; - - bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); - bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - __bio_add_page(bio, page, PAGE_SIZE, 0); - bio_chain(bio, parent); - submit_bio(bio); -} - static void release_wb_req(struct zram_wb_req *req) { __free_page(req->page); @@ -870,7 +901,7 @@ release_wb_ctl: static void zram_account_writeback_rollback(struct zram *zram) { - lockdep_assert_held_read(&zram->init_lock); + lockdep_assert_held_write(&zram->dev_lock); if (zram->wb_limit_enable) zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); @@ -878,7 +909,7 @@ static void zram_account_writeback_rollback(struct zram *zram) static void zram_account_writeback_submit(struct zram *zram) { - lockdep_assert_held_read(&zram->init_lock); + lockdep_assert_held_write(&zram->dev_lock); if (zram->wb_limit_enable && zram->bd_wb_limit > 0) zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); @@ -886,8 +917,9 @@ static void zram_account_writeback_submit(struct zram *zram) static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) { - u32 index = req->pps->index; - int err; + u32 size, index = req->pps->index; + int err, prio; + bool huge; err = blk_status_to_errno(req->bio.bi_status); if (err) { @@ -901,7 +933,7 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) } atomic64_inc(&zram->stats.bd_writes); - zram_slot_lock(zram, index); + slot_lock(zram, index); /* * We release slot lock during writeback so slot can change under us: * slot_free() or slot_free() and zram_write_page(). In both cases @@ -909,18 +941,36 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) * set ZRAM_PP_SLOT on such slots until current post-processing * finishes. */ - if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) { + if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) { zram_release_bdev_block(zram, req->blk_idx); goto out; } - zram_free_page(zram, index); - zram_set_flag(zram, index, ZRAM_WB); - zram_set_handle(zram, index, req->blk_idx); + if (zram->wb_compressed) { + /* + * ZRAM_WB slots get freed, we need to preserve data required + * for read decompression. + */ + size = get_slot_size(zram, index); + prio = get_slot_comp_priority(zram, index); + huge = test_slot_flag(zram, index, ZRAM_HUGE); + } + + slot_free(zram, index); + set_slot_flag(zram, index, ZRAM_WB); + set_slot_handle(zram, index, req->blk_idx); + + if (zram->wb_compressed) { + if (huge) + set_slot_flag(zram, index, ZRAM_HUGE); + set_slot_size(zram, index, size); + set_slot_comp_priority(zram, index, prio); + } + atomic64_inc(&zram->stats.pages_stored); out: - zram_slot_unlock(zram, index); + slot_unlock(zram, index); return 0; } @@ -1041,18 +1091,22 @@ static int zram_writeback_slots(struct zram *zram, } index = pps->index; - zram_slot_lock(zram, index); + slot_lock(zram, index); /* * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so * slots can change in the meantime. If slots are accessed or * freed they lose ZRAM_PP_SLOT flag and hence we don't * post-process them. */ - if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) + if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) goto next; - if (zram_read_from_zspool(zram, req->page, index)) + if (zram->wb_compressed) + err = read_from_zspool_raw(zram, req->page, index); + else + err = read_from_zspool(zram, req->page, index); + if (err) goto next; - zram_slot_unlock(zram, index); + slot_unlock(zram, index); /* * From now on pp-slot is owned by the req, remove it from @@ -1074,7 +1128,7 @@ static int zram_writeback_slots(struct zram *zram, continue; next: - zram_slot_unlock(zram, index); + slot_unlock(zram, index); release_pp_slot(zram, pps); } @@ -1167,27 +1221,27 @@ static int scan_slots_for_writeback(struct zram *zram, u32 mode, while (index < hi) { bool ok = true; - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index)) + slot_lock(zram, index); + if (!slot_allocated(zram, index)) goto next; - if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME)) + if (test_slot_flag(zram, index, ZRAM_WB) || + test_slot_flag(zram, index, ZRAM_SAME)) goto next; if (mode & IDLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_IDLE)) + !test_slot_flag(zram, index, ZRAM_IDLE)) goto next; if (mode & HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE)) + !test_slot_flag(zram, index, ZRAM_HUGE)) goto next; if (mode & INCOMPRESSIBLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; ok = place_pp_slot(zram, ctl, index); next: - zram_slot_unlock(zram, index); + slot_unlock(zram, index); if (!ok) break; index++; @@ -1209,33 +1263,21 @@ static ssize_t writeback_store(struct device *dev, ssize_t ret = len; int err, mode = 0; - down_read(&zram->init_lock); - if (!init_done(zram)) { - up_read(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); + if (!init_done(zram)) return -EINVAL; - } - - /* Do not permit concurrent post-processing actions. */ - if (atomic_xchg(&zram->pp_in_progress, 1)) { - up_read(&zram->init_lock); - return -EAGAIN; - } - if (!zram->backing_dev) { - ret = -ENODEV; - goto release_init_lock; - } + if (!zram->backing_dev) + return -ENODEV; pp_ctl = init_pp_ctl(); - if (!pp_ctl) { - ret = -ENOMEM; - goto release_init_lock; - } + if (!pp_ctl) + return -ENOMEM; wb_ctl = init_wb_ctl(zram); if (!wb_ctl) { ret = -ENOMEM; - goto release_init_lock; + goto out; } args = skip_spaces(buf); @@ -1259,7 +1301,7 @@ static ssize_t writeback_store(struct device *dev, err = parse_mode(param, &mode); if (err) { ret = err; - goto release_init_lock; + goto out; } scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); @@ -1270,7 +1312,7 @@ static ssize_t writeback_store(struct device *dev, err = parse_mode(val, &mode); if (err) { ret = err; - goto release_init_lock; + goto out; } scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); @@ -1281,7 +1323,7 @@ static ssize_t writeback_store(struct device *dev, err = parse_page_index(val, nr_pages, &lo, &hi); if (err) { ret = err; - goto release_init_lock; + goto out; } scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); @@ -1292,7 +1334,7 @@ static ssize_t writeback_store(struct device *dev, err = parse_page_indexes(val, nr_pages, &lo, &hi); if (err) { ret = err; - goto release_init_lock; + goto out; } scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); @@ -1304,33 +1346,147 @@ static ssize_t writeback_store(struct device *dev, if (err) ret = err; -release_init_lock: +out: release_pp_ctl(zram, pp_ctl); release_wb_ctl(wb_ctl); - atomic_set(&zram->pp_in_progress, 0); - up_read(&zram->init_lock); return ret; } -struct zram_work { - struct work_struct work; - struct zram *zram; - unsigned long entry; - struct page *page; - int error; -}; +static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) +{ + struct zcomp_strm *zstrm; + unsigned int size; + int ret, prio; + void *src; + + slot_lock(zram, index); + /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ + if (!test_slot_flag(zram, index, ZRAM_WB)) { + slot_unlock(zram, index); + /* We read some stale data, zero it out */ + memset_page(page, 0, 0, PAGE_SIZE); + return -EIO; + } + + if (test_slot_flag(zram, index, ZRAM_HUGE)) { + slot_unlock(zram, index); + return 0; + } + + size = get_slot_size(zram, index); + prio = get_slot_comp_priority(zram, index); + + zstrm = zcomp_stream_get(zram->comps[prio]); + src = kmap_local_page(page); + ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, + zstrm->local_copy); + if (!ret) + copy_page(src, zstrm->local_copy); + kunmap_local(src); + zcomp_stream_put(zstrm); + slot_unlock(zram, index); + + return ret; +} + +static void zram_deferred_decompress(struct work_struct *w) +{ + struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); + struct page *page = bio_first_page_all(req->bio); + struct zram *zram = req->zram; + u32 index = req->index; + int ret; + + ret = decompress_bdev_page(zram, page, index); + if (ret) + req->parent->bi_status = BLK_STS_IOERR; + + /* Decrement parent's ->remaining */ + bio_endio(req->parent); + bio_put(req->bio); + kfree(req); +} -static void zram_sync_read(struct work_struct *work) +static void zram_async_read_endio(struct bio *bio) { - struct zram_work *zw = container_of(work, struct zram_work, work); + struct zram_rb_req *req = bio->bi_private; + struct zram *zram = req->zram; + + if (bio->bi_status) { + req->parent->bi_status = bio->bi_status; + bio_endio(req->parent); + bio_put(bio); + kfree(req); + return; + } + + /* + * NOTE: zram_async_read_endio() is not exactly right place for this. + * Ideally, we need to do it after ZRAM_WB check, but this requires + * us to use wq path even on systems that don't enable compressed + * writeback, because we cannot take slot-lock in the current context. + * + * Keep the existing behavior for now. + */ + if (zram->wb_compressed == false) { + /* No decompression needed, complete the parent IO */ + bio_endio(req->parent); + bio_put(bio); + kfree(req); + return; + } + + /* + * zram decompression is sleepable, so we need to deffer it to + * a preemptible context. + */ + INIT_WORK(&req->work, zram_deferred_decompress); + queue_work(system_highpri_wq, &req->work); +} + +static void read_from_bdev_async(struct zram *zram, struct page *page, + u32 index, unsigned long blk_idx, + struct bio *parent) +{ + struct zram_rb_req *req; + struct bio *bio; + + req = kmalloc(sizeof(*req), GFP_NOIO); + if (!req) + return; + + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); + if (!bio) { + kfree(req); + return; + } + + req->zram = zram; + req->index = index; + req->blk_idx = blk_idx; + req->bio = bio; + req->parent = parent; + + bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); + bio->bi_private = req; + bio->bi_end_io = zram_async_read_endio; + + __bio_add_page(bio, page, PAGE_SIZE, 0); + bio_inc_remaining(parent); + submit_bio(bio); +} + +static void zram_sync_read(struct work_struct *w) +{ + struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); struct bio_vec bv; struct bio bio; - bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); - bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); - __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); - zw->error = submit_bio_wait(&bio); + bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); + __bio_add_page(&bio, req->page, PAGE_SIZE, 0); + req->error = submit_bio_wait(&bio); } /* @@ -1338,39 +1494,42 @@ static void zram_sync_read(struct work_struct *work) * chained IO with parent IO in same context, it's a deadlock. To avoid that, * use a worker thread context. */ -static int read_from_bdev_sync(struct zram *zram, struct page *page, - unsigned long entry) +static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, + unsigned long blk_idx) { - struct zram_work work; + struct zram_rb_req req; + + req.page = page; + req.zram = zram; + req.blk_idx = blk_idx; - work.page = page; - work.zram = zram; - work.entry = entry; + INIT_WORK_ONSTACK(&req.work, zram_sync_read); + queue_work(system_dfl_wq, &req.work); + flush_work(&req.work); + destroy_work_on_stack(&req.work); - INIT_WORK_ONSTACK(&work.work, zram_sync_read); - queue_work(system_dfl_wq, &work.work); - flush_work(&work.work); - destroy_work_on_stack(&work.work); + if (req.error || zram->wb_compressed == false) + return req.error; - return work.error; + return decompress_bdev_page(zram, page, index); } -static int read_from_bdev(struct zram *zram, struct page *page, - unsigned long entry, struct bio *parent) +static int read_from_bdev(struct zram *zram, struct page *page, u32 index, + unsigned long blk_idx, struct bio *parent) { atomic64_inc(&zram->stats.bd_reads); if (!parent) { if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) return -EIO; - return read_from_bdev_sync(zram, page, entry); + return read_from_bdev_sync(zram, page, index, blk_idx); } - read_from_bdev_async(zram, page, entry, parent); + read_from_bdev_async(zram, page, index, blk_idx, parent); return 0; } #else static inline void reset_bdev(struct zram *zram) {}; -static int read_from_bdev(struct zram *zram, struct page *page, - unsigned long entry, struct bio *parent) +static int read_from_bdev(struct zram *zram, struct page *page, u32 index, + unsigned long blk_idx, struct bio *parent) { return -EIO; } @@ -1401,15 +1560,13 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ssize_t index, written = 0; struct zram *zram = file->private_data; unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - struct timespec64 ts; kbuf = kvmalloc(count, GFP_KERNEL); if (!kbuf) return -ENOMEM; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); if (!init_done(zram)) { - up_read(&zram->init_lock); kvfree(kbuf); return -EINVAL; } @@ -1417,35 +1574,32 @@ static ssize_t read_block_state(struct file *file, char __user *buf, for (index = *ppos; index < nr_pages; index++) { int copied; - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index)) + slot_lock(zram, index); + if (!slot_allocated(zram, index)) goto next; - ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c%c%c\n", - index, (s64)ts.tv_sec, - ts.tv_nsec / NSEC_PER_USEC, - zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', - zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', - zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', - zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', - zram_get_priority(zram, index) ? 'r' : '.', - zram_test_flag(zram, index, + "%12zd %12u.%06d %c%c%c%c%c%c\n", + index, zram->table[index].attr.ac_time, 0, + test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.', + test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.', + test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', + test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', + get_slot_comp_priority(zram, index) ? 'r' : '.', + test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); if (count <= copied) { - zram_slot_unlock(zram, index); + slot_unlock(zram, index); break; } written += copied; count -= copied; next: - zram_slot_unlock(zram, index); + slot_unlock(zram, index); *ppos += 1; } - up_read(&zram->init_lock); if (copy_to_user(buf, kbuf, written)) written = -EFAULT; kvfree(kbuf); @@ -1512,16 +1666,14 @@ static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) return -EINVAL; } - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); if (init_done(zram)) { - up_write(&zram->init_lock); kfree(compressor); pr_info("Can't change algorithm for initialized device\n"); return -EBUSY; } comp_algorithm_set(zram, prio, compressor); - up_write(&zram->init_lock); return 0; } @@ -1642,9 +1794,8 @@ static ssize_t comp_algorithm_show(struct device *dev, struct zram *zram = dev_to_zram(dev); ssize_t sz; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); - up_read(&zram->init_lock); return sz; } @@ -1669,7 +1820,7 @@ static ssize_t recomp_algorithm_show(struct device *dev, ssize_t sz = 0; u32 prio; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) continue; @@ -1677,7 +1828,6 @@ static ssize_t recomp_algorithm_show(struct device *dev, sz += sysfs_emit_at(buf, sz, "#%d: ", prio); sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); } - up_read(&zram->init_lock); return sz; } @@ -1723,42 +1873,38 @@ static ssize_t recomp_algorithm_store(struct device *dev, } #endif -static ssize_t compact_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static ssize_t compact_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); - down_read(&zram->init_lock); - if (!init_done(zram)) { - up_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); + if (!init_done(zram)) return -EINVAL; - } zs_compact(zram->mem_pool); - up_read(&zram->init_lock); return len; } -static ssize_t io_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct zram *zram = dev_to_zram(dev); ssize_t ret; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); ret = sysfs_emit(buf, "%8llu %8llu 0 %8llu\n", (u64)atomic64_read(&zram->stats.failed_reads), (u64)atomic64_read(&zram->stats.failed_writes), (u64)atomic64_read(&zram->stats.notify_free)); - up_read(&zram->init_lock); return ret; } -static ssize_t mm_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct zram *zram = dev_to_zram(dev); struct zs_pool_stats pool_stats; @@ -1768,7 +1914,7 @@ static ssize_t mm_stat_show(struct device *dev, memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); if (init_done(zram)) { mem_used = zs_get_total_pages(zram->mem_pool); zs_pool_stats(zram->mem_pool, &pool_stats); @@ -1788,55 +1934,26 @@ static ssize_t mm_stat_show(struct device *dev, atomic_long_read(&pool_stats.pages_compacted), (u64)atomic64_read(&zram->stats.huge_pages), (u64)atomic64_read(&zram->stats.huge_pages_since)); - up_read(&zram->init_lock); return ret; } -#ifdef CONFIG_ZRAM_WRITEBACK -#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) -static ssize_t bd_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - ssize_t ret; - - down_read(&zram->init_lock); - ret = sysfs_emit(buf, - "%8llu %8llu %8llu\n", - FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), - FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), - FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); - up_read(&zram->init_lock); - - return ret; -} -#endif - static ssize_t debug_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { int version = 1; struct zram *zram = dev_to_zram(dev); ssize_t ret; - down_read(&zram->init_lock); + guard(rwsem_read)(&zram->dev_lock); ret = sysfs_emit(buf, "version: %d\n0 %8llu\n", version, (u64)atomic64_read(&zram->stats.miss_free)); - up_read(&zram->init_lock); return ret; } -static DEVICE_ATTR_RO(io_stat); -static DEVICE_ATTR_RO(mm_stat); -#ifdef CONFIG_ZRAM_WRITEBACK -static DEVICE_ATTR_RO(bd_stat); -#endif -static DEVICE_ATTR_RO(debug_stat); - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -1847,7 +1964,7 @@ static void zram_meta_free(struct zram *zram, u64 disksize) /* Free all pages that are still in this zram device */ for (index = 0; index < num_pages; index++) - zram_free_page(zram, index); + slot_free(zram, index); zs_destroy_pool(zram->mem_pool); vfree(zram->table); @@ -1874,32 +1991,32 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) huge_class_size = zs_huge_class_size(zram->mem_pool); for (index = 0; index < num_pages; index++) - zram_slot_lock_init(zram, index); + slot_lock_init(zram, index); return true; } -static void zram_free_page(struct zram *zram, size_t index) +static void slot_free(struct zram *zram, u32 index) { unsigned long handle; #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME - zram->table[index].ac_time = 0; + zram->table[index].attr.ac_time = 0; #endif - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); - zram_clear_flag(zram, index, ZRAM_PP_SLOT); - zram_set_priority(zram, index, 0); + clear_slot_flag(zram, index, ZRAM_IDLE); + clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); + clear_slot_flag(zram, index, ZRAM_PP_SLOT); + set_slot_comp_priority(zram, index, 0); - if (zram_test_flag(zram, index, ZRAM_HUGE)) { - zram_clear_flag(zram, index, ZRAM_HUGE); + if (test_slot_flag(zram, index, ZRAM_HUGE)) { + clear_slot_flag(zram, index, ZRAM_HUGE); atomic64_dec(&zram->stats.huge_pages); } - if (zram_test_flag(zram, index, ZRAM_WB)) { - zram_clear_flag(zram, index, ZRAM_WB); - zram_release_bdev_block(zram, zram_get_handle(zram, index)); + if (test_slot_flag(zram, index, ZRAM_WB)) { + clear_slot_flag(zram, index, ZRAM_WB); + zram_release_bdev_block(zram, get_slot_handle(zram, index)); goto out; } @@ -1907,24 +2024,24 @@ static void zram_free_page(struct zram *zram, size_t index) * No memory is allocated for same element filled pages. * Simply clear same page flag. */ - if (zram_test_flag(zram, index, ZRAM_SAME)) { - zram_clear_flag(zram, index, ZRAM_SAME); + if (test_slot_flag(zram, index, ZRAM_SAME)) { + clear_slot_flag(zram, index, ZRAM_SAME); atomic64_dec(&zram->stats.same_pages); goto out; } - handle = zram_get_handle(zram, index); + handle = get_slot_handle(zram, index); if (!handle) return; zs_free(zram->mem_pool, handle); - atomic64_sub(zram_get_obj_size(zram, index), + atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); out: atomic64_dec(&zram->stats.pages_stored); - zram_set_handle(zram, index, 0); - zram_set_obj_size(zram, index, 0); + set_slot_handle(zram, index, 0); + set_slot_size(zram, index, 0); } static int read_same_filled_page(struct zram *zram, struct page *page, @@ -1933,7 +2050,7 @@ static int read_same_filled_page(struct zram *zram, struct page *page, void *mem; mem = kmap_local_page(page); - zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index)); + zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index)); kunmap_local(mem); return 0; } @@ -1944,12 +2061,12 @@ static int read_incompressible_page(struct zram *zram, struct page *page, unsigned long handle; void *src, *dst; - handle = zram_get_handle(zram, index); - src = zs_obj_read_begin(zram->mem_pool, handle, NULL); + handle = get_slot_handle(zram, index); + src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL); dst = kmap_local_page(page); copy_page(dst, src); kunmap_local(dst); - zs_obj_read_end(zram->mem_pool, handle, src); + zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src); return 0; } @@ -1962,33 +2079,60 @@ static int read_compressed_page(struct zram *zram, struct page *page, u32 index) void *src, *dst; int ret, prio; - handle = zram_get_handle(zram, index); - size = zram_get_obj_size(zram, index); - prio = zram_get_priority(zram, index); + handle = get_slot_handle(zram, index); + size = get_slot_size(zram, index); + prio = get_slot_comp_priority(zram, index); zstrm = zcomp_stream_get(zram->comps[prio]); - src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy); + src = zs_obj_read_begin(zram->mem_pool, handle, size, + zstrm->local_copy); dst = kmap_local_page(page); ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); kunmap_local(dst); - zs_obj_read_end(zram->mem_pool, handle, src); + zs_obj_read_end(zram->mem_pool, handle, size, src); zcomp_stream_put(zstrm); return ret; } +#if defined CONFIG_ZRAM_WRITEBACK +static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) +{ + struct zcomp_strm *zstrm; + unsigned long handle; + unsigned int size; + void *src; + + handle = get_slot_handle(zram, index); + size = get_slot_size(zram, index); + + /* + * We need to get stream just for ->local_copy buffer, in + * case if object spans two physical pages. No decompression + * takes place here, as we read raw compressed data. + */ + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); + src = zs_obj_read_begin(zram->mem_pool, handle, size, + zstrm->local_copy); + memcpy_to_page(page, 0, src, size); + zs_obj_read_end(zram->mem_pool, handle, size, src); + zcomp_stream_put(zstrm); + + return 0; +} +#endif + /* * Reads (decompresses if needed) a page from zspool (zsmalloc). * Corresponding ZRAM slot should be locked. */ -static int zram_read_from_zspool(struct zram *zram, struct page *page, - u32 index) +static int read_from_zspool(struct zram *zram, struct page *page, u32 index) { - if (zram_test_flag(zram, index, ZRAM_SAME) || - !zram_get_handle(zram, index)) + if (test_slot_flag(zram, index, ZRAM_SAME) || + !get_slot_handle(zram, index)) return read_same_filled_page(zram, page, index); - if (!zram_test_flag(zram, index, ZRAM_HUGE)) + if (!test_slot_flag(zram, index, ZRAM_HUGE)) return read_compressed_page(zram, page, index); else return read_incompressible_page(zram, page, index); @@ -1999,20 +2143,20 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index, { int ret; - zram_slot_lock(zram, index); - if (!zram_test_flag(zram, index, ZRAM_WB)) { + slot_lock(zram, index); + if (!test_slot_flag(zram, index, ZRAM_WB)) { /* Slot should be locked through out the function call */ - ret = zram_read_from_zspool(zram, page, index); - zram_slot_unlock(zram, index); + ret = read_from_zspool(zram, page, index); + slot_unlock(zram, index); } else { - unsigned long blk_idx = zram_get_handle(zram, index); + unsigned long blk_idx = get_slot_handle(zram, index); /* * The slot should be unlocked before reading from the backing * device. */ - zram_slot_unlock(zram, index); - ret = read_from_bdev(zram, page, blk_idx, parent); + slot_unlock(zram, index); + ret = read_from_bdev(zram, page, index, blk_idx, parent); } /* Should NEVER happen. Return bio error if it does. */ @@ -2052,11 +2196,11 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, static int write_same_filled_page(struct zram *zram, unsigned long fill, u32 index) { - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_set_flag(zram, index, ZRAM_SAME); - zram_set_handle(zram, index, fill); - zram_slot_unlock(zram, index); + slot_lock(zram, index); + slot_free(zram, index); + set_slot_flag(zram, index, ZRAM_SAME); + set_slot_handle(zram, index, fill); + slot_unlock(zram, index); atomic64_inc(&zram->stats.same_pages); atomic64_inc(&zram->stats.pages_stored); @@ -2090,12 +2234,12 @@ static int write_incompressible_page(struct zram *zram, struct page *page, zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); kunmap_local(src); - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_set_flag(zram, index, ZRAM_HUGE); - zram_set_handle(zram, index, handle); - zram_set_obj_size(zram, index, PAGE_SIZE); - zram_slot_unlock(zram, index); + slot_lock(zram, index); + slot_free(zram, index); + set_slot_flag(zram, index, ZRAM_HUGE); + set_slot_handle(zram, index, handle); + set_slot_size(zram, index, PAGE_SIZE); + slot_unlock(zram, index); atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); atomic64_inc(&zram->stats.huge_pages); @@ -2155,11 +2299,11 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); zcomp_stream_put(zstrm); - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_set_handle(zram, index, handle); - zram_set_obj_size(zram, index, comp_len); - zram_slot_unlock(zram, index); + slot_lock(zram, index); + slot_free(zram, index); + set_slot_handle(zram, index, handle); + set_slot_size(zram, index, comp_len); + slot_unlock(zram, index); /* Update stats */ atomic64_inc(&zram->stats.pages_stored); @@ -2210,30 +2354,30 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, for (index = 0; index < nr_pages; index++) { bool ok = true; - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index)) + slot_lock(zram, index); + if (!slot_allocated(zram, index)) goto next; if (mode & RECOMPRESS_IDLE && - !zram_test_flag(zram, index, ZRAM_IDLE)) + !test_slot_flag(zram, index, ZRAM_IDLE)) goto next; if (mode & RECOMPRESS_HUGE && - !zram_test_flag(zram, index, ZRAM_HUGE)) + !test_slot_flag(zram, index, ZRAM_HUGE)) goto next; - if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + if (test_slot_flag(zram, index, ZRAM_WB) || + test_slot_flag(zram, index, ZRAM_SAME) || + test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; /* Already compressed with same of higher priority */ - if (zram_get_priority(zram, index) + 1 >= prio_max) + if (get_slot_comp_priority(zram, index) + 1 >= prio_max) goto next; ok = place_pp_slot(zram, ctl, index); next: - zram_slot_unlock(zram, index); + slot_unlock(zram, index); if (!ok) break; } @@ -2262,18 +2406,18 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, void *src; int ret = 0; - handle_old = zram_get_handle(zram, index); + handle_old = get_slot_handle(zram, index); if (!handle_old) return -EINVAL; - comp_len_old = zram_get_obj_size(zram, index); + comp_len_old = get_slot_size(zram, index); /* * Do not recompress objects that are already "small enough". */ if (comp_len_old < threshold) return 0; - ret = zram_read_from_zspool(zram, page, index); + ret = read_from_zspool(zram, page, index); if (ret) return ret; @@ -2282,11 +2426,11 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, * we don't preserve IDLE flag and don't incorrectly pick this entry * for different post-processing type (e.g. writeback). */ - zram_clear_flag(zram, index, ZRAM_IDLE); + clear_slot_flag(zram, index, ZRAM_IDLE); class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); - prio = max(prio, zram_get_priority(zram, index) + 1); + prio = max(prio, get_slot_comp_priority(zram, index) + 1); /* * Recompression slots scan should not select slots that are * already compressed with a higher priority algorithm, but @@ -2353,7 +2497,7 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, */ if (prio < zram->num_active_comps) return 0; - zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } @@ -2362,14 +2506,15 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, * avoid direct reclaim. Allocation error is not fatal since * we still have the old object in the mem_pool. * - * XXX: technically, the node we really want here is the node that holds - * the original compressed data. But that would require us to modify - * zsmalloc API to return this information. For now, we will make do with - * the node of the page allocated for recompression. + * XXX: technically, the node we really want here is the node that + * holds the original compressed data. But that would require us to + * modify zsmalloc API to return this information. For now, we will + * make do with the node of the page allocated for recompression. */ handle_new = zs_malloc(zram->mem_pool, comp_len_new, GFP_NOIO | __GFP_NOWARN | - __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); + __GFP_HIGHMEM | __GFP_MOVABLE, + page_to_nid(page)); if (IS_ERR_VALUE(handle_new)) { zcomp_stream_put(zstrm); return PTR_ERR((void *)handle_new); @@ -2378,10 +2523,10 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); zcomp_stream_put(zstrm); - zram_free_page(zram, index); - zram_set_handle(zram, index, handle_new); - zram_set_obj_size(zram, index, comp_len_new); - zram_set_priority(zram, index, prio); + slot_free(zram, index); + set_slot_handle(zram, index, handle_new); + set_slot_size(zram, index, comp_len_new); + set_slot_comp_priority(zram, index, prio); atomic64_add(comp_len_new, &zram->stats.compr_data_size); atomic64_inc(&zram->stats.pages_stored); @@ -2466,17 +2611,9 @@ static ssize_t recompress_store(struct device *dev, if (threshold >= huge_class_size) return -EINVAL; - down_read(&zram->init_lock); - if (!init_done(zram)) { - ret = -EINVAL; - goto release_init_lock; - } - - /* Do not permit concurrent post-processing actions. */ - if (atomic_xchg(&zram->pp_in_progress, 1)) { - up_read(&zram->init_lock); - return -EAGAIN; - } + guard(rwsem_write)(&zram->dev_lock); + if (!init_done(zram)) + return -EINVAL; if (algo) { bool found = false; @@ -2494,26 +2631,26 @@ static ssize_t recompress_store(struct device *dev, if (!found) { ret = -EINVAL; - goto release_init_lock; + goto out; } } prio_max = min(prio_max, (u32)zram->num_active_comps); if (prio >= prio_max) { ret = -EINVAL; - goto release_init_lock; + goto out; } page = alloc_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; - goto release_init_lock; + goto out; } ctl = init_pp_ctl(); if (!ctl) { ret = -ENOMEM; - goto release_init_lock; + goto out; } scan_slots_for_recompress(zram, mode, prio_max, ctl); @@ -2525,15 +2662,15 @@ static ssize_t recompress_store(struct device *dev, if (!num_recomp_pages) break; - zram_slot_lock(zram, pps->index); - if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) + slot_lock(zram, pps->index); + if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT)) goto next; err = recompress_slot(zram, pps->index, page, &num_recomp_pages, threshold, prio, prio_max); next: - zram_slot_unlock(zram, pps->index); + slot_unlock(zram, pps->index); release_pp_slot(zram, pps); if (err) { @@ -2544,12 +2681,10 @@ next: cond_resched(); } -release_init_lock: +out: if (page) __free_page(page); release_pp_ctl(zram, ctl); - atomic_set(&zram->pp_in_progress, 0); - up_read(&zram->init_lock); return ret; } #endif @@ -2580,9 +2715,9 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio) } while (n >= PAGE_SIZE) { - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_slot_unlock(zram, index); + slot_lock(zram, index); + slot_free(zram, index); + slot_unlock(zram, index); atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; @@ -2611,9 +2746,9 @@ static void zram_bio_read(struct zram *zram, struct bio *bio) } flush_dcache_page(bv.bv_page); - zram_slot_lock(zram, index); - zram_accessed(zram, index); - zram_slot_unlock(zram, index); + slot_lock(zram, index); + mark_slot_accessed(zram, index); + slot_unlock(zram, index); bio_advance_iter_single(bio, &iter, bv.bv_len); } while (iter.bi_size); @@ -2641,9 +2776,9 @@ static void zram_bio_write(struct zram *zram, struct bio *bio) break; } - zram_slot_lock(zram, index); - zram_accessed(zram, index); - zram_slot_unlock(zram, index); + slot_lock(zram, index); + mark_slot_accessed(zram, index); + slot_unlock(zram, index); bio_advance_iter_single(bio, &iter, bv.bv_len); } while (iter.bi_size); @@ -2684,13 +2819,13 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; atomic64_inc(&zram->stats.notify_free); - if (!zram_slot_trylock(zram, index)) { + if (!slot_trylock(zram, index)) { atomic64_inc(&zram->stats.miss_free); return; } - zram_free_page(zram, index); - zram_slot_unlock(zram, index); + slot_free(zram, index); + slot_unlock(zram, index); } static void zram_comp_params_reset(struct zram *zram) @@ -2728,7 +2863,7 @@ static void zram_destroy_comps(struct zram *zram) static void zram_reset_device(struct zram *zram) { - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); zram->limit_pages = 0; @@ -2740,15 +2875,13 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; zram_destroy_comps(zram); memset(&zram->stats, 0, sizeof(zram->stats)); - atomic_set(&zram->pp_in_progress, 0); reset_bdev(zram); comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); - up_write(&zram->init_lock); } -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) { u64 disksize; struct zcomp *comp; @@ -2760,18 +2893,15 @@ static ssize_t disksize_store(struct device *dev, if (!disksize) return -EINVAL; - down_write(&zram->init_lock); + guard(rwsem_write)(&zram->dev_lock); if (init_done(zram)) { pr_info("Cannot change disksize for initialized device\n"); - err = -EBUSY; - goto out_unlock; + return -EBUSY; } disksize = PAGE_ALIGN(disksize); - if (!zram_meta_alloc(zram, disksize)) { - err = -ENOMEM; - goto out_unlock; - } + if (!zram_meta_alloc(zram, disksize)) + return -ENOMEM; for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) @@ -2791,15 +2921,12 @@ static ssize_t disksize_store(struct device *dev, } zram->disksize = disksize; set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); - up_write(&zram->init_lock); return len; out_free_comps: zram_destroy_comps(zram); zram_meta_free(zram, disksize); -out_unlock: - up_write(&zram->init_lock); return err; } @@ -2862,6 +2989,9 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static DEVICE_ATTR_RO(io_stat); +static DEVICE_ATTR_RO(mm_stat); +static DEVICE_ATTR_RO(debug_stat); static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); @@ -2871,11 +3001,13 @@ static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_WO(idle); static DEVICE_ATTR_RW(comp_algorithm); #ifdef CONFIG_ZRAM_WRITEBACK +static DEVICE_ATTR_RO(bd_stat); static DEVICE_ATTR_RW(backing_dev); static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); static DEVICE_ATTR_RW(writeback_batch_size); +static DEVICE_ATTR_RW(writeback_compressed); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); @@ -2893,17 +3025,16 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_idle.attr, &dev_attr_comp_algorithm.attr, #ifdef CONFIG_ZRAM_WRITEBACK + &dev_attr_bd_stat.attr, &dev_attr_backing_dev.attr, &dev_attr_writeback.attr, &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, &dev_attr_writeback_batch_size.attr, + &dev_attr_writeback_compressed.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, -#ifdef CONFIG_ZRAM_WRITEBACK - &dev_attr_bd_stat.attr, -#endif &dev_attr_debug_stat.attr, #ifdef CONFIG_ZRAM_MULTI_COMP &dev_attr_recomp_algorithm.attr, @@ -2957,9 +3088,10 @@ static int zram_add(void) goto out_free_dev; device_id = ret; - init_rwsem(&zram->init_lock); + init_rwsem(&zram->dev_lock); #ifdef CONFIG_ZRAM_WRITEBACK zram->wb_batch_size = 32; + zram->wb_compressed = false; #endif /* gendisk structure */ @@ -2978,7 +3110,6 @@ static int zram_add(void) zram->disk->fops = &zram_devops; zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); - atomic_set(&zram->pp_in_progress, 0); zram_comp_params_reset(zram); comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); @@ -3139,7 +3270,7 @@ static int __init zram_init(void) struct zram_table_entry zram_te; int ret; - BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8); + BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8); ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", zcomp_cpu_up_prepare, zcomp_cpu_dead); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c6d94501376c..515a72d9c06f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -65,10 +65,15 @@ enum zram_pageflags { */ struct zram_table_entry { unsigned long handle; - unsigned long flags; + union { + unsigned long __lock; + struct attr { + u32 flags; #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME - ktime_t ac_time; + u32 ac_time; #endif + } attr; + }; struct lockdep_map dep_map; }; @@ -106,8 +111,8 @@ struct zram { struct zcomp *comps[ZRAM_MAX_COMPS]; struct zcomp_params params[ZRAM_MAX_COMPS]; struct gendisk *disk; - /* Prevent concurrent execution of device init */ - struct rw_semaphore init_lock; + /* Locks the device either in exclusive or in shared mode */ + struct rw_semaphore dev_lock; /* * the number of pages zram can consume for storing compressed data */ @@ -128,6 +133,7 @@ struct zram { #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; bool wb_limit_enable; + bool wb_compressed; u32 wb_batch_size; u64 bd_wb_limit; struct block_device *bdev; @@ -137,6 +143,5 @@ struct zram { #ifdef CONFIG_ZRAM_MEMORY_TRACKING struct dentry *debugfs_dir; #endif - atomic_t pp_in_progress; }; #endif diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index d7d41b054b98..5cc79d1517af 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -410,7 +410,7 @@ config DS1682 config VMWARE_BALLOON tristate "VMware Balloon Driver" depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST - select MEMORY_BALLOON + select BALLOON help This is VMware physical memory management driver which acts like a "balloon" that can be inflated to reclaim physical pages diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index cc1d18b3df5c..216a16395968 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -29,7 +29,7 @@ #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/balloon_compaction.h> +#include <linux/balloon.h> #include <linux/vmw_vmci_defs.h> #include <linux/vmw_vmci_api.h> #include <asm/hypervisor.h> @@ -354,11 +354,16 @@ struct vmballoon { /** * @huge_pages - list of the inflated 2MB pages. * - * Protected by @b_dev_info.pages_lock . + * Protected by @huge_pages_lock. */ struct list_head huge_pages; /** + * @huge_pages_lock: lock for the list of inflated 2MB pages. + */ + spinlock_t huge_pages_lock; + + /** * @vmci_doorbell. * * Protected by @conf_sem. @@ -987,7 +992,6 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, unsigned int *n_pages, enum vmballoon_page_size_type page_size) { - unsigned long flags; struct page *page; if (page_size == VMW_BALLOON_4K_PAGE) { @@ -995,9 +999,9 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, } else { /* * Keep the huge pages in a local list which is not available - * for the balloon compaction mechanism. + * for the balloon page migration. */ - spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); + spin_lock(&b->huge_pages_lock); list_for_each_entry(page, pages, lru) { vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE); @@ -1006,7 +1010,7 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, list_splice_init(pages, &b->huge_pages); __count_vm_events(BALLOON_INFLATE, *n_pages * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); - spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags); + spin_unlock(&b->huge_pages_lock); } *n_pages = 0; @@ -1033,7 +1037,6 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b, { struct page *page, *tmp; unsigned int i = 0; - unsigned long flags; /* In the case of 4k pages, use the compaction infrastructure */ if (page_size == VMW_BALLOON_4K_PAGE) { @@ -1043,7 +1046,7 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b, } /* 2MB pages */ - spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); + spin_lock(&b->huge_pages_lock); list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) { vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE); @@ -1054,7 +1057,7 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b, __count_vm_events(BALLOON_DEFLATE, i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); - spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags); + spin_unlock(&b->huge_pages_lock); *n_pages = i; } @@ -1716,7 +1719,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) #endif /* CONFIG_DEBUG_FS */ -#ifdef CONFIG_BALLOON_COMPACTION +#ifdef CONFIG_BALLOON_MIGRATION /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. @@ -1724,18 +1727,17 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) * @page: a ballooned page that should be migrated. * @mode: migration mode, ignored. * - * This function is really open-coded, but that is according to the interface - * that balloon_compaction provides. - * * Return: zero on success, -EAGAIN when migration cannot be performed - * momentarily, and -EBUSY if migration failed and should be retried - * with that specific page. + * momentarily, -EBUSY if migration failed and should be retried + * with that specific page, and -ENOENT when deflating @page + * succeeded but inflating @newpage failed, effectively deflating + * the balloon. */ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) { - unsigned long status, flags; + unsigned long status; struct vmballoon *b; int ret = 0; @@ -1773,14 +1775,6 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, goto out_unlock; } - /* - * The page is isolated, so it is safe to delete it without holding - * @pages_lock . We keep holding @comm_lock since we will need it in a - * second. - */ - balloon_page_finalize(page); - put_page(page); - /* Inflate */ vmballoon_add_page(b, 0, newpage); status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE, @@ -1799,60 +1793,21 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, * change. */ atomic64_dec(&b->size); - } else { /* - * Success. Take a reference for the page, and we will add it to - * the list after acquiring the lock. + * Tell the core that we're deflating the old page and don't + * need the new page. */ - get_page(newpage); - } - - /* Update the balloon list under the @pages_lock */ - spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); - - /* - * On inflation success, we already took a reference for the @newpage. - * If we succeed just insert it to the list and update the statistics - * under the lock. - */ - if (status == VMW_BALLOON_SUCCESS) { - balloon_page_insert(&b->b_dev_info, newpage); - __count_vm_event(BALLOON_MIGRATE); + ret = -ENOENT; } - - /* - * We deflated successfully, so regardless to the inflation success, we - * need to reduce the number of isolated_pages. - */ - b->b_dev_info.isolated_pages--; - spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags); - out_unlock: up_read(&b->conf_sem); return ret; } - -/** - * vmballoon_compaction_init() - initialized compaction for the balloon. - * - * @b: pointer to the balloon. - * - * If during the initialization a failure occurred, this function does not - * perform cleanup. The caller must call vmballoon_compaction_deinit() in this - * case. - * - * Return: zero on success or error code on failure. - */ -static __init void vmballoon_compaction_init(struct vmballoon *b) -{ - b->b_dev_info.migratepage = vmballoon_migratepage; -} - -#else /* CONFIG_BALLOON_COMPACTION */ -static inline void vmballoon_compaction_init(struct vmballoon *b) -{ -} -#endif /* CONFIG_BALLOON_COMPACTION */ +#else /* CONFIG_BALLOON_MIGRATION */ +int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, + struct page *newpage, struct page *page, + enum migrate_mode mode); +#endif /* CONFIG_BALLOON_MIGRATION */ static int __init vmballoon_init(void) { @@ -1871,14 +1826,12 @@ static int __init vmballoon_init(void) if (error) return error; - /* - * Initialization of compaction must be done after the call to - * balloon_devinfo_init() . - */ balloon_devinfo_init(&balloon.b_dev_info); - vmballoon_compaction_init(&balloon); + if (IS_ENABLED(CONFIG_BALLOON_MIGRATION)) + balloon.b_dev_info.migratepage = vmballoon_migratepage; INIT_LIST_HEAD(&balloon.huge_pages); + spin_lock_init(&balloon.huge_pages_lock); spin_lock_init(&balloon.comm_lock); init_rwsem(&balloon.conf_sem); balloon.vmci_doorbell = VMCI_INVALID_HANDLE; diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 6db5235a7693..ce5bc0d9ea28 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -112,7 +112,7 @@ config VIRTIO_PMEM config VIRTIO_BALLOON tristate "Virtio balloon driver" depends on VIRTIO - select MEMORY_BALLOON + select BALLOON select PAGE_REPORTING help This driver supports increasing and decreasing the amount diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 74fe59f5a78c..4e549abe59ff 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -13,7 +13,7 @@ #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> -#include <linux/balloon_compaction.h> +#include <linux/balloon.h> #include <linux/oom.h> #include <linux/wait.h> #include <linux/mm.h> @@ -242,8 +242,8 @@ static void set_page_pfns(struct virtio_balloon *vb, static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num) { unsigned int num_allocated_pages; + struct page *page, *next; unsigned int num_pfns; - struct page *page; LIST_HEAD(pages); /* We can only do one array worth at a time. */ @@ -262,21 +262,19 @@ static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num) break; } - balloon_page_push(&pages, page); + list_add(&page->lru, &pages); } mutex_lock(&vb->balloon_lock); vb->num_pfns = 0; - while ((page = balloon_page_pop(&pages))) { + list_for_each_entry_safe(page, next, &pages, lru) { + list_del(&page->lru); balloon_page_enqueue(&vb->vb_dev_info, page); set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; - if (!virtio_has_feature(vb->vdev, - VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - adjust_managed_page_count(page, -1); vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; } @@ -295,9 +293,6 @@ static void release_pages_balloon(struct virtio_balloon *vb, struct page *page, *next; list_for_each_entry_safe(page, next, pages, lru) { - if (!virtio_has_feature(vb->vdev, - VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - adjust_managed_page_count(page, 1); list_del(&page->lru); put_page(page); /* balloon reference */ } @@ -480,15 +475,19 @@ static inline s64 towards_target(struct virtio_balloon *vb) static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, unsigned long num_to_return) { - struct page *page; - unsigned long num_returned; + unsigned long num_returned = 0; + struct page *page, *next; + + if (unlikely(!num_to_return)) + return 0; spin_lock_irq(&vb->free_page_list_lock); - for (num_returned = 0; num_returned < num_to_return; num_returned++) { - page = balloon_page_pop(&vb->free_page_list); - if (!page) - break; + + list_for_each_entry_safe(page, next, &vb->free_page_list, lru) { + list_del(&page->lru); __free_pages(page, VIRTIO_BALLOON_HINT_BLOCK_ORDER); + if (++num_returned == num_to_return) + break; } vb->num_free_page_blocks -= num_returned; spin_unlock_irq(&vb->free_page_list_lock); @@ -723,7 +722,7 @@ static int get_free_page_and_send(struct virtio_balloon *vb) } virtqueue_kick(vq); spin_lock_irq(&vb->free_page_list_lock); - balloon_page_push(&vb->free_page_list, page); + list_add(&page->lru, &vb->free_page_list); vb->num_free_page_blocks++; spin_unlock_irq(&vb->free_page_list_lock); } else { @@ -803,7 +802,7 @@ static void report_free_page_func(struct work_struct *work) } } -#ifdef CONFIG_BALLOON_COMPACTION +#ifdef CONFIG_BALLOON_MIGRATION /* * virtballoon_migratepage - perform the balloon page migration on behalf of * a compaction thread. (called under page lock) @@ -827,7 +826,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, { struct virtio_balloon *vb = container_of(vb_dev_info, struct virtio_balloon, vb_dev_info); - unsigned long flags; /* * In order to avoid lock contention while migrating pages concurrently @@ -840,25 +838,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, if (!mutex_trylock(&vb->balloon_lock)) return -EAGAIN; - get_page(newpage); /* balloon reference */ - - /* - * When we migrate a page to a different zone and adjusted the - * managed page count when inflating, we have to fixup the count of - * both involved zones. - */ - if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) && - page_zone(page) != page_zone(newpage)) { - adjust_managed_page_count(page, 1); - adjust_managed_page_count(newpage, -1); - } - /* balloon's page migration 1st step -- inflate "newpage" */ - spin_lock_irqsave(&vb_dev_info->pages_lock, flags); - balloon_page_insert(vb_dev_info, newpage); - vb_dev_info->isolated_pages--; - __count_vm_event(BALLOON_MIGRATE); - spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb, vb->pfns, newpage); tell_host(vb, vb->inflate_vq); @@ -869,13 +849,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, tell_host(vb, vb->deflate_vq); mutex_unlock(&vb->balloon_lock); - - balloon_page_finalize(page); - put_page(page); /* balloon reference */ - return 0; } -#endif /* CONFIG_BALLOON_COMPACTION */ +#endif /* CONFIG_BALLOON_MIGRATION */ static unsigned long shrink_free_pages(struct virtio_balloon *vb, unsigned long pages_to_free) @@ -970,7 +946,9 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; -#ifdef CONFIG_BALLOON_COMPACTION + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) + vb->vb_dev_info.adjust_managed_page_count = true; +#ifdef CONFIG_BALLOON_MIGRATION vb->vb_dev_info.migratepage = virtballoon_migratepage; #endif if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { |
