From bc88528cda2eddc3e5ea304fc3f147f1b4186aa4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jan 2024 17:09:44 +0100 Subject: PM: sleep: stats: Use array of suspend step names Replace suspend_step_name() in the suspend statistics code with an array of suspend step names which has fewer lines of code and less overhead. While at it, remove two unnecessary line breaks in suspend_stats_show() and adjust some white space in there to the kernel coding style for a more consistent code layout. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Reviewed-by: Ulf Hansson --- kernel/power/main.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index b1ae9b677d03..dca14543dfed 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -319,25 +319,17 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_test); #endif /* CONFIG_PM_SLEEP_DEBUG */ -static char *suspend_step_name(enum suspend_stat_step step) -{ - switch (step) { - case SUSPEND_FREEZE: - return "freeze"; - case SUSPEND_PREPARE: - return "prepare"; - case SUSPEND_SUSPEND: - return "suspend"; - case SUSPEND_SUSPEND_NOIRQ: - return "suspend_noirq"; - case SUSPEND_RESUME_NOIRQ: - return "resume_noirq"; - case SUSPEND_RESUME: - return "resume"; - default: - return ""; - } -} +static const char * const suspend_step_names[] = { + [SUSPEND_WORKING] = "", + [SUSPEND_FREEZE] = "freeze", + [SUSPEND_PREPARE] = "prepare", + [SUSPEND_SUSPEND] = "suspend", + [SUSPEND_SUSPEND_LATE] = "suspend_late", + [SUSPEND_SUSPEND_NOIRQ] = "suspend_noirq", + [SUSPEND_RESUME_NOIRQ] = "resume_noirq", + [SUSPEND_RESUME_EARLY] = "resume_early", + [SUSPEND_RESUME] = "resume", +}; #define suspend_attr(_name, format_str) \ static ssize_t _name##_show(struct kobject *kobj, \ @@ -392,16 +384,14 @@ static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno); static ssize_t last_failed_step_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int index; enum suspend_stat_step step; - char *last_failed_step = NULL; + int index; index = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; index %= REC_FAILED_NUM; step = suspend_stats.failed_steps[index]; - last_failed_step = suspend_step_name(step); - return sprintf(buf, "%s\n", last_failed_step); + return sprintf(buf, "%s\n", suspend_step_names[step]); } static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step); @@ -473,30 +463,26 @@ static int suspend_stats_show(struct seq_file *s, void *unused) "failed_resume_noirq", suspend_stats.failed_resume_noirq); seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", - suspend_stats.failed_devs[last_dev]); + suspend_stats.failed_devs[last_dev]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_dev + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; - seq_printf(s, "\t\t\t%-s\n", - suspend_stats.failed_devs[index]); + seq_printf(s, "\t\t\t%-s\n", suspend_stats.failed_devs[index]); } seq_printf(s, " last_failed_errno:\t%-d\n", suspend_stats.errno[last_errno]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_errno + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; - seq_printf(s, "\t\t\t%-d\n", - suspend_stats.errno[index]); + seq_printf(s, "\t\t\t%-d\n", suspend_stats.errno[index]); } seq_printf(s, " last_failed_step:\t%-s\n", - suspend_step_name( - suspend_stats.failed_steps[last_step])); + suspend_step_names[suspend_stats.failed_steps[last_step]]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_step + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-s\n", - suspend_step_name( - suspend_stats.failed_steps[index])); + suspend_step_names[suspend_stats.failed_steps[index]]); } return 0; -- cgit v1.2.3 From b730bab0b9c4204d7dda3f5bc8adf4292497fc39 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jan 2024 17:11:57 +0100 Subject: PM: sleep: stats: Use an array of step failure counters Instead of using a set of individual struct suspend_stats fields representing suspend step failure counters, use an array of counters indexed by enum suspend_stat_step for this purpose, which allows dpm_save_failed_step() to increment the appropriate counter automatically, so that its callers don't need to do that directly. It also allows suspend_stats_show() to carry out a loop over the counters array to print their values. Because the counters cannot become negative, use unsigned int for representing them. The only user-observable impact of this change is a different ordering of entries in the suspend_stats debugfs file which is not expected to matter. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Reviewed-by: Ulf Hansson --- kernel/power/main.c | 51 ++++++++++++++++++++++++++------------------------ kernel/power/suspend.c | 1 - 2 files changed, 27 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index dca14543dfed..d7a02105b183 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -341,18 +341,28 @@ static struct kobj_attribute _name = __ATTR_RO(_name) suspend_attr(success, "%d\n"); suspend_attr(fail, "%d\n"); -suspend_attr(failed_freeze, "%d\n"); -suspend_attr(failed_prepare, "%d\n"); -suspend_attr(failed_suspend, "%d\n"); -suspend_attr(failed_suspend_late, "%d\n"); -suspend_attr(failed_suspend_noirq, "%d\n"); -suspend_attr(failed_resume, "%d\n"); -suspend_attr(failed_resume_early, "%d\n"); -suspend_attr(failed_resume_noirq, "%d\n"); suspend_attr(last_hw_sleep, "%llu\n"); suspend_attr(total_hw_sleep, "%llu\n"); suspend_attr(max_hw_sleep, "%llu\n"); +#define suspend_step_attr(_name, step) \ +static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", \ + suspend_stats.step_failures[step-1]); \ +} \ +static struct kobj_attribute _name = __ATTR_RO(_name) + +suspend_step_attr(failed_freeze, SUSPEND_FREEZE); +suspend_step_attr(failed_prepare, SUSPEND_PREPARE); +suspend_step_attr(failed_suspend, SUSPEND_SUSPEND); +suspend_step_attr(failed_suspend_late, SUSPEND_SUSPEND_LATE); +suspend_step_attr(failed_suspend_noirq, SUSPEND_SUSPEND_NOIRQ); +suspend_step_attr(failed_resume, SUSPEND_RESUME); +suspend_step_attr(failed_resume_early, SUSPEND_RESUME_EARLY); +suspend_step_attr(failed_resume_noirq, SUSPEND_RESUME_NOIRQ); + static ssize_t last_failed_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -439,6 +449,7 @@ static const struct attribute_group suspend_attr_group = { static int suspend_stats_show(struct seq_file *s, void *unused) { int i, index, last_dev, last_errno, last_step; + enum suspend_stat_step step; last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; last_dev %= REC_FAILED_NUM; @@ -446,22 +457,14 @@ static int suspend_stats_show(struct seq_file *s, void *unused) last_errno %= REC_FAILED_NUM; last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; last_step %= REC_FAILED_NUM; - seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n" - "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n", - "success", suspend_stats.success, - "fail", suspend_stats.fail, - "failed_freeze", suspend_stats.failed_freeze, - "failed_prepare", suspend_stats.failed_prepare, - "failed_suspend", suspend_stats.failed_suspend, - "failed_suspend_late", - suspend_stats.failed_suspend_late, - "failed_suspend_noirq", - suspend_stats.failed_suspend_noirq, - "failed_resume", suspend_stats.failed_resume, - "failed_resume_early", - suspend_stats.failed_resume_early, - "failed_resume_noirq", - suspend_stats.failed_resume_noirq); + + seq_printf(s, "success: %d\nfail: %d\n", + suspend_stats.success, suspend_stats.fail); + + for (step = SUSPEND_FREEZE; step <= SUSPEND_NR_STEPS; step++) + seq_printf(s, "failed_%s: %u\n", suspend_step_names[step], + suspend_stats.step_failures[step-1]); + seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", suspend_stats.failed_devs[last_dev]); for (i = 1; i < REC_FAILED_NUM; i++) { diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index fa3bf161d13f..07bde5bba49e 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -367,7 +367,6 @@ static int suspend_prepare(suspend_state_t state) if (!error) return 0; - suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); pm_notifier_call_chain(PM_POST_SUSPEND); Restore: -- cgit v1.2.3 From 2231f78d3e15e45abe534db1997bc6a2153dc01c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jan 2024 17:13:14 +0100 Subject: PM: sleep: stats: Use unsigned int for success and failure counters Change the type of the "success" and "fail" fields in struct suspend_stats to unsigned int, because they cannot be negative. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Reviewed-by: Ulf Hansson --- kernel/power/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index d7a02105b183..d6b4a9258288 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -339,8 +339,8 @@ static ssize_t _name##_show(struct kobject *kobj, \ } \ static struct kobj_attribute _name = __ATTR_RO(_name) -suspend_attr(success, "%d\n"); -suspend_attr(fail, "%d\n"); +suspend_attr(success, "%u\n"); +suspend_attr(fail, "%u\n"); suspend_attr(last_hw_sleep, "%llu\n"); suspend_attr(total_hw_sleep, "%llu\n"); suspend_attr(max_hw_sleep, "%llu\n"); @@ -458,7 +458,7 @@ static int suspend_stats_show(struct seq_file *s, void *unused) last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; last_step %= REC_FAILED_NUM; - seq_printf(s, "success: %d\nfail: %d\n", + seq_printf(s, "success: %u\nfail: %u\n", suspend_stats.success, suspend_stats.fail); for (step = SUSPEND_FREEZE; step <= SUSPEND_NR_STEPS; step++) -- cgit v1.2.3 From 9ff544fa5f94fe07f99a36d2138075b322067546 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jan 2024 17:30:44 +0100 Subject: PM: sleep: stats: Define suspend_stats next to the code using it It is not necessary to define struct suspend_stats in a header file and the suspend_stats variable in the core device system-wide PM code. They both can be defined in kernel/power/main.c, next to the sysfs and debugfs code accessing suspend_stats, which can be static. Modify the code in question in accordance with the above observation and replace the static inline functions manipulating suspend_stats with regular ones defined in kernel/power/main.c. While at it, move the enum suspend_stat_step to the end of suspend.h which is a more suitable place for it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- kernel/power/main.c | 76 +++++++++++++++++++++++++++++++++++++++++--------- kernel/power/power.h | 2 ++ kernel/power/suspend.c | 7 +---- 3 files changed, 66 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index d6b4a9258288..8c4bf5a54805 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -95,19 +95,6 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -void pm_report_hw_sleep_time(u64 t) -{ - suspend_stats.last_hw_sleep = t; - suspend_stats.total_hw_sleep += t; -} -EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time); - -void pm_report_max_hw_sleep(u64 t) -{ - suspend_stats.max_hw_sleep = t; -} -EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep); - int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; @@ -319,6 +306,69 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_test); #endif /* CONFIG_PM_SLEEP_DEBUG */ +#define SUSPEND_NR_STEPS SUSPEND_RESUME +#define REC_FAILED_NUM 2 + +struct suspend_stats { + unsigned int step_failures[SUSPEND_NR_STEPS]; + unsigned int success; + unsigned int fail; + int last_failed_dev; + char failed_devs[REC_FAILED_NUM][40]; + int last_failed_errno; + int errno[REC_FAILED_NUM]; + int last_failed_step; + u64 last_hw_sleep; + u64 total_hw_sleep; + u64 max_hw_sleep; + enum suspend_stat_step failed_steps[REC_FAILED_NUM]; +}; + +static struct suspend_stats suspend_stats; + +void dpm_save_failed_dev(const char *name) +{ + strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], + name, sizeof(suspend_stats.failed_devs[0])); + suspend_stats.last_failed_dev++; + suspend_stats.last_failed_dev %= REC_FAILED_NUM; +} + +void dpm_save_failed_step(enum suspend_stat_step step) +{ + suspend_stats.step_failures[step-1]++; + suspend_stats.failed_steps[suspend_stats.last_failed_step] = step; + suspend_stats.last_failed_step++; + suspend_stats.last_failed_step %= REC_FAILED_NUM; +} + +void dpm_save_errno(int err) +{ + if (!err) { + suspend_stats.success++; + return; + } + + suspend_stats.fail++; + + suspend_stats.errno[suspend_stats.last_failed_errno] = err; + suspend_stats.last_failed_errno++; + suspend_stats.last_failed_errno %= REC_FAILED_NUM; +} + +void pm_report_hw_sleep_time(u64 t) +{ + suspend_stats.last_hw_sleep = t; + suspend_stats.total_hw_sleep += t; +} +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time); + +void pm_report_max_hw_sleep(u64 t) +{ + suspend_stats.max_hw_sleep = t; +} +EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep); + static const char * const suspend_step_names[] = { [SUSPEND_WORKING] = "", [SUSPEND_FREEZE] = "freeze", diff --git a/kernel/power/power.h b/kernel/power/power.h index 8499a39c62f4..4e03046b9c4d 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -327,3 +327,5 @@ static inline void pm_sleep_enable_secondary_cpus(void) suspend_enable_secondary_cpus(); cpuidle_resume(); } + +void dpm_save_errno(int err); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 07bde5bba49e..742eb26618cc 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -616,12 +616,7 @@ int pm_suspend(suspend_state_t state) pr_info("suspend entry (%s)\n", mem_sleep_labels[state]); error = enter_state(state); - if (error) { - suspend_stats.fail++; - dpm_save_failed_errno(error); - } else { - suspend_stats.success++; - } + dpm_save_errno(error); pr_info("suspend exit\n"); return error; } -- cgit v1.2.3 From a6d38e991dc4f1b4a86137177435660df53951c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jan 2024 17:24:30 +0100 Subject: PM: sleep: stats: Use locking in dpm_save_failed_dev() Because dpm_save_failed_dev() may be called simultaneously by multiple failing device PM functions, the state of the suspend_stats fields updated by it may become inconsistent. Prevent that from happening by using a lock in dpm_save_failed_dev(). Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Reviewed-by: Ulf Hansson --- kernel/power/main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 8c4bf5a54805..a9e0693aaf69 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -325,13 +325,18 @@ struct suspend_stats { }; static struct suspend_stats suspend_stats; +static DEFINE_MUTEX(suspend_stats_lock); void dpm_save_failed_dev(const char *name) { + mutex_lock(&suspend_stats_lock); + strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], name, sizeof(suspend_stats.failed_devs[0])); suspend_stats.last_failed_dev++; suspend_stats.last_failed_dev %= REC_FAILED_NUM; + + mutex_unlock(&suspend_stats_lock); } void dpm_save_failed_step(enum suspend_stat_step step) -- cgit v1.2.3 From 89a807625f9701154167bf6bf136adfa1be4d849 Mon Sep 17 00:00:00 2001 From: Nikhil V Date: Mon, 22 Jan 2024 18:45:25 +0530 Subject: PM: hibernate: Rename lzo* to make it generic Renaming lzo* to generic names, except for lzo_xxx() APIs. This is used in the next patch where we move to crypto based APIs for compression. There are no functional changes introduced by this approach. Signed-off-by: Nikhil V Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 120 ++++++++++++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 60 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 6053ddddaf65..35c62f91c13b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -515,23 +515,23 @@ static int swap_writer_finish(struct swap_map_handle *handle, } /* We need to remember how much compressed data we need to read. */ -#define LZO_HEADER sizeof(size_t) +#define CMP_HEADER sizeof(size_t) /* Number of pages/bytes we'll compress at one time. */ -#define LZO_UNC_PAGES 32 -#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE) +#define UNC_PAGES 32 +#define UNC_SIZE (UNC_PAGES * PAGE_SIZE) -/* Number of pages/bytes we need for compressed data (worst case). */ -#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \ - LZO_HEADER, PAGE_SIZE) -#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) +/* Number of pages we need for compressed data (worst case). */ +#define CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(UNC_SIZE) + \ + CMP_HEADER, PAGE_SIZE) +#define CMP_SIZE (CMP_PAGES * PAGE_SIZE) /* Maximum number of threads for compression/decompression. */ -#define LZO_THREADS 3 +#define CMP_THREADS 3 /* Minimum/maximum number of pages for read buffering. */ -#define LZO_MIN_RD_PAGES 1024 -#define LZO_MAX_RD_PAGES 8192 +#define CMP_MIN_RD_PAGES 1024 +#define CMP_MAX_RD_PAGES 8192 /** @@ -593,8 +593,8 @@ struct crc_data { wait_queue_head_t go; /* start crc update */ wait_queue_head_t done; /* crc update done */ u32 *crc32; /* points to handle's crc32 */ - size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */ - unsigned char *unc[LZO_THREADS]; /* uncompressed data */ + size_t *unc_len[CMP_THREADS]; /* uncompressed lengths */ + unsigned char *unc[CMP_THREADS]; /* uncompressed data */ }; /* @@ -625,7 +625,7 @@ static int crc32_threadfn(void *data) return 0; } /* - * Structure used for LZO data compression. + * Structure used for data compression. */ struct cmp_data { struct task_struct *thr; /* thread */ @@ -636,15 +636,15 @@ struct cmp_data { wait_queue_head_t done; /* compression done */ size_t unc_len; /* uncompressed length */ size_t cmp_len; /* compressed length */ - unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ - unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ + unsigned char unc[UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[CMP_SIZE]; /* compressed buffer */ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */ }; /* * Compression function that runs in its own thread. */ -static int lzo_compress_threadfn(void *data) +static int compress_threadfn(void *data) { struct cmp_data *d = data; @@ -661,7 +661,7 @@ static int lzo_compress_threadfn(void *data) atomic_set(&d->ready, 0); d->ret = lzo1x_1_compress(d->unc, d->unc_len, - d->cmp + LZO_HEADER, &d->cmp_len, + d->cmp + CMP_HEADER, &d->cmp_len, d->wrk); atomic_set_release(&d->stop, 1); wake_up(&d->done); @@ -670,14 +670,14 @@ static int lzo_compress_threadfn(void *data) } /** - * save_image_lzo - Save the suspend image data compressed with LZO. + * save_compressed_image - Save the suspend image data after compression. * @handle: Swap map handle to use for saving the image. * @snapshot: Image to read data from. * @nr_to_write: Number of pages to save. */ -static int save_image_lzo(struct swap_map_handle *handle, - struct snapshot_handle *snapshot, - unsigned int nr_to_write) +static int save_compressed_image(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_write) { unsigned int m; int ret = 0; @@ -699,18 +699,18 @@ static int save_image_lzo(struct swap_map_handle *handle, * footprint. */ nr_threads = num_online_cpus() - 1; - nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + nr_threads = clamp_val(nr_threads, 1, CMP_THREADS); page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH); if (!page) { - pr_err("Failed to allocate LZO page\n"); + pr_err("Failed to allocate compression page\n"); ret = -ENOMEM; goto out_clean; } data = vzalloc(array_size(nr_threads, sizeof(*data))); if (!data) { - pr_err("Failed to allocate LZO data\n"); + pr_err("Failed to allocate compression data\n"); ret = -ENOMEM; goto out_clean; } @@ -729,7 +729,7 @@ static int save_image_lzo(struct swap_map_handle *handle, init_waitqueue_head(&data[thr].go); init_waitqueue_head(&data[thr].done); - data[thr].thr = kthread_run(lzo_compress_threadfn, + data[thr].thr = kthread_run(compress_threadfn, &data[thr], "image_compress/%u", thr); if (IS_ERR(data[thr].thr)) { @@ -777,7 +777,7 @@ static int save_image_lzo(struct swap_map_handle *handle, start = ktime_get(); for (;;) { for (thr = 0; thr < nr_threads; thr++) { - for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { + for (off = 0; off < UNC_SIZE; off += PAGE_SIZE) { ret = snapshot_read_next(snapshot); if (ret < 0) goto out_finish; @@ -817,14 +817,14 @@ static int save_image_lzo(struct swap_map_handle *handle, ret = data[thr].ret; if (ret < 0) { - pr_err("LZO compression failed\n"); + pr_err("compression failed\n"); goto out_finish; } if (unlikely(!data[thr].cmp_len || data[thr].cmp_len > lzo1x_worst_compress(data[thr].unc_len))) { - pr_err("Invalid LZO compressed length\n"); + pr_err("Invalid compressed length\n"); ret = -1; goto out_finish; } @@ -840,7 +840,7 @@ static int save_image_lzo(struct swap_map_handle *handle, * read it. */ for (off = 0; - off < LZO_HEADER + data[thr].cmp_len; + off < CMP_HEADER + data[thr].cmp_len; off += PAGE_SIZE) { memcpy(page, data[thr].cmp + off, PAGE_SIZE); @@ -942,7 +942,7 @@ int swsusp_write(unsigned int flags) if (!error) { error = (flags & SF_NOCOMPRESS_MODE) ? save_image(&handle, &snapshot, pages - 1) : - save_image_lzo(&handle, &snapshot, pages - 1); + save_compressed_image(&handle, &snapshot, pages - 1); } out_finish: error = swap_writer_finish(&handle, flags, error); @@ -1109,7 +1109,7 @@ static int load_image(struct swap_map_handle *handle, } /* - * Structure used for LZO data decompression. + * Structure used for data decompression. */ struct dec_data { struct task_struct *thr; /* thread */ @@ -1120,14 +1120,14 @@ struct dec_data { wait_queue_head_t done; /* decompression done */ size_t unc_len; /* uncompressed length */ size_t cmp_len; /* compressed length */ - unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ - unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ + unsigned char unc[UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[CMP_SIZE]; /* compressed buffer */ }; /* * Decompression function that runs in its own thread. */ -static int lzo_decompress_threadfn(void *data) +static int decompress_threadfn(void *data) { struct dec_data *d = data; @@ -1143,9 +1143,9 @@ static int lzo_decompress_threadfn(void *data) } atomic_set(&d->ready, 0); - d->unc_len = LZO_UNC_SIZE; - d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len, - d->unc, &d->unc_len); + d->unc_len = UNC_SIZE; + d->ret = lzo1x_decompress_safe(d->cmp + CMP_HEADER, d->cmp_len, + d->unc, &d->unc_len); if (clean_pages_on_decompress) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); @@ -1157,14 +1157,14 @@ static int lzo_decompress_threadfn(void *data) } /** - * load_image_lzo - Load compressed image data and decompress them with LZO. + * load_compressed_image - Load compressed image data and decompress it. * @handle: Swap map handle to use for loading data. * @snapshot: Image to copy uncompressed data into. * @nr_to_read: Number of pages to load. */ -static int load_image_lzo(struct swap_map_handle *handle, - struct snapshot_handle *snapshot, - unsigned int nr_to_read) +static int load_compressed_image(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) { unsigned int m; int ret = 0; @@ -1189,18 +1189,18 @@ static int load_image_lzo(struct swap_map_handle *handle, * footprint. */ nr_threads = num_online_cpus() - 1; - nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + nr_threads = clamp_val(nr_threads, 1, CMP_THREADS); - page = vmalloc(array_size(LZO_MAX_RD_PAGES, sizeof(*page))); + page = vmalloc(array_size(CMP_MAX_RD_PAGES, sizeof(*page))); if (!page) { - pr_err("Failed to allocate LZO page\n"); + pr_err("Failed to allocate compression page\n"); ret = -ENOMEM; goto out_clean; } data = vzalloc(array_size(nr_threads, sizeof(*data))); if (!data) { - pr_err("Failed to allocate LZO data\n"); + pr_err("Failed to allocate compression data\n"); ret = -ENOMEM; goto out_clean; } @@ -1221,7 +1221,7 @@ static int load_image_lzo(struct swap_map_handle *handle, init_waitqueue_head(&data[thr].go); init_waitqueue_head(&data[thr].done); - data[thr].thr = kthread_run(lzo_decompress_threadfn, + data[thr].thr = kthread_run(decompress_threadfn, &data[thr], "image_decompress/%u", thr); if (IS_ERR(data[thr].thr)) { @@ -1262,18 +1262,18 @@ static int load_image_lzo(struct swap_map_handle *handle, */ if (low_free_pages() > snapshot_get_image_size()) read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; - read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); + read_pages = clamp_val(read_pages, CMP_MIN_RD_PAGES, CMP_MAX_RD_PAGES); for (i = 0; i < read_pages; i++) { - page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? + page[i] = (void *)__get_free_page(i < CMP_PAGES ? GFP_NOIO | __GFP_HIGH : GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (!page[i]) { - if (i < LZO_CMP_PAGES) { + if (i < CMP_PAGES) { ring_size = i; - pr_err("Failed to allocate LZO pages\n"); + pr_err("Failed to allocate compression pages\n"); ret = -ENOMEM; goto out_clean; } else { @@ -1344,13 +1344,13 @@ static int load_image_lzo(struct swap_map_handle *handle, data[thr].cmp_len = *(size_t *)page[pg]; if (unlikely(!data[thr].cmp_len || data[thr].cmp_len > - lzo1x_worst_compress(LZO_UNC_SIZE))) { - pr_err("Invalid LZO compressed length\n"); + lzo1x_worst_compress(UNC_SIZE))) { + pr_err("Invalid compressed length\n"); ret = -1; goto out_finish; } - need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER, + need = DIV_ROUND_UP(data[thr].cmp_len + CMP_HEADER, PAGE_SIZE); if (need > have) { if (eof > 1) { @@ -1361,7 +1361,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } for (off = 0; - off < LZO_HEADER + data[thr].cmp_len; + off < CMP_HEADER + data[thr].cmp_len; off += PAGE_SIZE) { memcpy(data[thr].cmp + off, page[pg], PAGE_SIZE); @@ -1378,7 +1378,7 @@ static int load_image_lzo(struct swap_map_handle *handle, /* * Wait for more data while we are decompressing. */ - if (have < LZO_CMP_PAGES && asked) { + if (have < CMP_PAGES && asked) { ret = hib_wait_io(&hb); if (ret) goto out_finish; @@ -1396,14 +1396,14 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = data[thr].ret; if (ret < 0) { - pr_err("LZO decompression failed\n"); + pr_err("decompression failed\n"); goto out_finish; } if (unlikely(!data[thr].unc_len || - data[thr].unc_len > LZO_UNC_SIZE || - data[thr].unc_len & (PAGE_SIZE - 1))) { - pr_err("Invalid LZO uncompressed length\n"); + data[thr].unc_len > UNC_SIZE || + data[thr].unc_len & (PAGE_SIZE - 1))) { + pr_err("Invalid uncompressed length\n"); ret = -1; goto out_finish; } @@ -1500,7 +1500,7 @@ int swsusp_read(unsigned int *flags_p) if (!error) { error = (*flags_p & SF_NOCOMPRESS_MODE) ? load_image(&handle, &snapshot, header->pages - 1) : - load_image_lzo(&handle, &snapshot, header->pages - 1); + load_compressed_image(&handle, &snapshot, header->pages - 1); } swap_reader_finish(&handle); end: -- cgit v1.2.3 From a06c6f5d3cc90b3b070d7b99979d57238db77a86 Mon Sep 17 00:00:00 2001 From: Nikhil V Date: Mon, 22 Jan 2024 18:45:26 +0530 Subject: PM: hibernate: Move to crypto APIs for LZO compression Currently for hibernation, LZO is the only compression algorithm available and uses the existing LZO library calls. However, there is no flexibility to switch to other algorithms which provides better results. The main idea is that different compression algorithms have different characteristics and hibernation may benefit when it uses alternate algorithms. By moving to crypto based APIs, it lays a foundation to use other compression algorithms for hibernation. There are no functional changes introduced by this approach. Signed-off-by: Nikhil V Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 21 +++++++++- kernel/power/hibernate.c | 33 ++++++++++++++++ kernel/power/power.h | 5 +++ kernel/power/swap.c | 101 +++++++++++++++++++++++++++++++++++------------ 4 files changed, 132 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 4b31629c5be4..d4167159bae8 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -39,9 +39,9 @@ config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on SWAP && ARCH_HIBERNATION_POSSIBLE select HIBERNATE_CALLBACKS - select LZO_COMPRESS - select LZO_DECOMPRESS select CRC32 + select CRYPTO + select CRYPTO_LZO help Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the @@ -92,6 +92,23 @@ config HIBERNATION_SNAPSHOT_DEV If in doubt, say Y. +choice + prompt "Default compressor" + default HIBERNATION_COMP_LZO + depends on HIBERNATION + +config HIBERNATION_COMP_LZO + bool "lzo" + depends on CRYPTO_LZO + +endchoice + +config HIBERNATION_DEF_COMP + string + default "lzo" if HIBERNATION_COMP_LZO + help + Default compressor to be used for hibernation. + config PM_STD_PARTITION string "Default resume partition" depends on HIBERNATION diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 4b0b7cf2e019..76b7ff619c90 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -47,6 +47,15 @@ dev_t swsusp_resume_device; sector_t swsusp_resume_block; __visible int in_suspend __nosavedata; +static const char *default_compressor = CONFIG_HIBERNATION_DEF_COMP; + +/* + * Compression/decompression algorithm to be used while saving/loading + * image to/from disk. This would later be used in 'kernel/power/swap.c' + * to allocate comp streams. + */ +char hib_comp_algo[CRYPTO_MAX_ALG_NAME]; + enum { HIBERNATION_INVALID, HIBERNATION_PLATFORM, @@ -732,6 +741,17 @@ int hibernate(void) return -EPERM; } + /* + * Query for the compression algorithm support if compression is enabled. + */ + if (!nocompress) { + strscpy(hib_comp_algo, default_compressor, sizeof(hib_comp_algo)); + if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) { + pr_err("%s compression is not available\n", hib_comp_algo); + return -EOPNOTSUPP; + } + } + sleep_flags = lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { @@ -955,6 +975,19 @@ static int software_resume(void) if (error) goto Unlock; + /* + * Check if the hibernation image is compressed. If so, query for + * the algorithm support. + */ + if (!(swsusp_header_flags & SF_NOCOMPRESS_MODE)) { + strscpy(hib_comp_algo, default_compressor, sizeof(hib_comp_algo)); + if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) { + pr_err("%s compression is not available\n", hib_comp_algo); + error = -EOPNOTSUPP; + goto Unlock; + } + } + /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; diff --git a/kernel/power/power.h b/kernel/power/power.h index 4e03046b9c4d..5efa2c987057 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -6,6 +6,7 @@ #include #include #include +#include struct swsusp_info { struct new_utsname uts; @@ -54,6 +55,10 @@ asmlinkage int swsusp_save(void); /* kernel/power/hibernate.c */ extern bool freezer_test_done; +extern char hib_comp_algo[CRYPTO_MAX_ALG_NAME]; + +/* kernel/power/swap.c */ +extern unsigned int swsusp_header_flags; extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 35c62f91c13b..6513035f2f7f 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -339,6 +338,13 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) return error; } +/* + * Hold the swsusp_header flag. This is used in software_resume() in + * 'kernel/power/hibernate' to check if the image is compressed and query + * for the compression algorithm support(if so). + */ +unsigned int swsusp_header_flags; + /** * swsusp_swap_check - check if the resume device is a swap device * and get its index (if so) @@ -514,6 +520,12 @@ static int swap_writer_finish(struct swap_map_handle *handle, return error; } +/* + * Bytes we need for compressed data in worst case. We assume(limitation) + * this is the worst of all the compression algorithms. + */ +#define bytes_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2) + /* We need to remember how much compressed data we need to read. */ #define CMP_HEADER sizeof(size_t) @@ -522,7 +534,7 @@ static int swap_writer_finish(struct swap_map_handle *handle, #define UNC_SIZE (UNC_PAGES * PAGE_SIZE) /* Number of pages we need for compressed data (worst case). */ -#define CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(UNC_SIZE) + \ +#define CMP_PAGES DIV_ROUND_UP(bytes_worst_compress(UNC_SIZE) + \ CMP_HEADER, PAGE_SIZE) #define CMP_SIZE (CMP_PAGES * PAGE_SIZE) @@ -533,7 +545,6 @@ static int swap_writer_finish(struct swap_map_handle *handle, #define CMP_MIN_RD_PAGES 1024 #define CMP_MAX_RD_PAGES 8192 - /** * save_image - save the suspend image data */ @@ -629,6 +640,7 @@ static int crc32_threadfn(void *data) */ struct cmp_data { struct task_struct *thr; /* thread */ + struct crypto_comp *cc; /* crypto compressor stream */ atomic_t ready; /* ready to start flag */ atomic_t stop; /* ready to stop flag */ int ret; /* return code */ @@ -638,15 +650,18 @@ struct cmp_data { size_t cmp_len; /* compressed length */ unsigned char unc[UNC_SIZE]; /* uncompressed buffer */ unsigned char cmp[CMP_SIZE]; /* compressed buffer */ - unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */ }; +/* Indicates the image size after compression */ +static atomic_t compressed_size = ATOMIC_INIT(0); + /* * Compression function that runs in its own thread. */ static int compress_threadfn(void *data) { struct cmp_data *d = data; + unsigned int cmp_len = 0; while (1) { wait_event(d->go, atomic_read_acquire(&d->ready) || @@ -660,9 +675,13 @@ static int compress_threadfn(void *data) } atomic_set(&d->ready, 0); - d->ret = lzo1x_1_compress(d->unc, d->unc_len, - d->cmp + CMP_HEADER, &d->cmp_len, - d->wrk); + cmp_len = CMP_SIZE - CMP_HEADER; + d->ret = crypto_comp_compress(d->cc, d->unc, d->unc_len, + d->cmp + CMP_HEADER, + &cmp_len); + d->cmp_len = cmp_len; + + atomic_set(&compressed_size, atomic_read(&compressed_size) + d->cmp_len); atomic_set_release(&d->stop, 1); wake_up(&d->done); } @@ -694,6 +713,8 @@ static int save_compressed_image(struct swap_map_handle *handle, hib_init_batch(&hb); + atomic_set(&compressed_size, 0); + /* * We'll limit the number of threads for compression to limit memory * footprint. @@ -703,14 +724,14 @@ static int save_compressed_image(struct swap_map_handle *handle, page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH); if (!page) { - pr_err("Failed to allocate compression page\n"); + pr_err("Failed to allocate %s page\n", hib_comp_algo); ret = -ENOMEM; goto out_clean; } data = vzalloc(array_size(nr_threads, sizeof(*data))); if (!data) { - pr_err("Failed to allocate compression data\n"); + pr_err("Failed to allocate %s data\n", hib_comp_algo); ret = -ENOMEM; goto out_clean; } @@ -729,6 +750,13 @@ static int save_compressed_image(struct swap_map_handle *handle, init_waitqueue_head(&data[thr].go); init_waitqueue_head(&data[thr].done); + data[thr].cc = crypto_alloc_comp(hib_comp_algo, 0, 0); + if (IS_ERR_OR_NULL(data[thr].cc)) { + pr_err("Could not allocate comp stream %ld\n", PTR_ERR(data[thr].cc)); + ret = -EFAULT; + goto out_clean; + } + data[thr].thr = kthread_run(compress_threadfn, &data[thr], "image_compress/%u", thr); @@ -767,7 +795,7 @@ static int save_compressed_image(struct swap_map_handle *handle, */ handle->reqd_free_pages = reqd_free_pages(); - pr_info("Using %u thread(s) for compression\n", nr_threads); + pr_info("Using %u thread(s) for %s compression\n", nr_threads, hib_comp_algo); pr_info("Compressing and saving image data (%u pages)...\n", nr_to_write); m = nr_to_write / 10; @@ -817,14 +845,14 @@ static int save_compressed_image(struct swap_map_handle *handle, ret = data[thr].ret; if (ret < 0) { - pr_err("compression failed\n"); + pr_err("%s compression failed\n", hib_comp_algo); goto out_finish; } if (unlikely(!data[thr].cmp_len || data[thr].cmp_len > - lzo1x_worst_compress(data[thr].unc_len))) { - pr_err("Invalid compressed length\n"); + bytes_worst_compress(data[thr].unc_len))) { + pr_err("Invalid %s compressed length\n", hib_comp_algo); ret = -1; goto out_finish; } @@ -862,6 +890,9 @@ out_finish: if (!ret) pr_info("Image saving done\n"); swsusp_show_speed(start, stop, nr_to_write, "Wrote"); + pr_info("Image size after compression: %d kbytes\n", + (atomic_read(&compressed_size) / 1024)); + out_clean: hib_finish_batch(&hb); if (crc) { @@ -870,9 +901,12 @@ out_clean: kfree(crc); } if (data) { - for (thr = 0; thr < nr_threads; thr++) + for (thr = 0; thr < nr_threads; thr++) { if (data[thr].thr) kthread_stop(data[thr].thr); + if (data[thr].cc) + crypto_free_comp(data[thr].cc); + } vfree(data); } if (page) free_page((unsigned long)page); @@ -1113,6 +1147,7 @@ static int load_image(struct swap_map_handle *handle, */ struct dec_data { struct task_struct *thr; /* thread */ + struct crypto_comp *cc; /* crypto compressor stream */ atomic_t ready; /* ready to start flag */ atomic_t stop; /* ready to stop flag */ int ret; /* return code */ @@ -1130,6 +1165,7 @@ struct dec_data { static int decompress_threadfn(void *data) { struct dec_data *d = data; + unsigned int unc_len = 0; while (1) { wait_event(d->go, atomic_read_acquire(&d->ready) || @@ -1143,9 +1179,11 @@ static int decompress_threadfn(void *data) } atomic_set(&d->ready, 0); - d->unc_len = UNC_SIZE; - d->ret = lzo1x_decompress_safe(d->cmp + CMP_HEADER, d->cmp_len, - d->unc, &d->unc_len); + unc_len = UNC_SIZE; + d->ret = crypto_comp_decompress(d->cc, d->cmp + CMP_HEADER, d->cmp_len, + d->unc, &unc_len); + d->unc_len = unc_len; + if (clean_pages_on_decompress) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); @@ -1193,14 +1231,14 @@ static int load_compressed_image(struct swap_map_handle *handle, page = vmalloc(array_size(CMP_MAX_RD_PAGES, sizeof(*page))); if (!page) { - pr_err("Failed to allocate compression page\n"); + pr_err("Failed to allocate %s page\n", hib_comp_algo); ret = -ENOMEM; goto out_clean; } data = vzalloc(array_size(nr_threads, sizeof(*data))); if (!data) { - pr_err("Failed to allocate compression data\n"); + pr_err("Failed to allocate %s data\n", hib_comp_algo); ret = -ENOMEM; goto out_clean; } @@ -1221,6 +1259,13 @@ static int load_compressed_image(struct swap_map_handle *handle, init_waitqueue_head(&data[thr].go); init_waitqueue_head(&data[thr].done); + data[thr].cc = crypto_alloc_comp(hib_comp_algo, 0, 0); + if (IS_ERR_OR_NULL(data[thr].cc)) { + pr_err("Could not allocate comp stream %ld\n", PTR_ERR(data[thr].cc)); + ret = -EFAULT; + goto out_clean; + } + data[thr].thr = kthread_run(decompress_threadfn, &data[thr], "image_decompress/%u", thr); @@ -1273,7 +1318,7 @@ static int load_compressed_image(struct swap_map_handle *handle, if (!page[i]) { if (i < CMP_PAGES) { ring_size = i; - pr_err("Failed to allocate compression pages\n"); + pr_err("Failed to allocate %s pages\n", hib_comp_algo); ret = -ENOMEM; goto out_clean; } else { @@ -1283,7 +1328,7 @@ static int load_compressed_image(struct swap_map_handle *handle, } want = ring_size = i; - pr_info("Using %u thread(s) for decompression\n", nr_threads); + pr_info("Using %u thread(s) for %s decompression\n", nr_threads, hib_comp_algo); pr_info("Loading and decompressing image data (%u pages)...\n", nr_to_read); m = nr_to_read / 10; @@ -1344,8 +1389,8 @@ static int load_compressed_image(struct swap_map_handle *handle, data[thr].cmp_len = *(size_t *)page[pg]; if (unlikely(!data[thr].cmp_len || data[thr].cmp_len > - lzo1x_worst_compress(UNC_SIZE))) { - pr_err("Invalid compressed length\n"); + bytes_worst_compress(UNC_SIZE))) { + pr_err("Invalid %s compressed length\n", hib_comp_algo); ret = -1; goto out_finish; } @@ -1396,14 +1441,14 @@ static int load_compressed_image(struct swap_map_handle *handle, ret = data[thr].ret; if (ret < 0) { - pr_err("decompression failed\n"); + pr_err("%s decompression failed\n", hib_comp_algo); goto out_finish; } if (unlikely(!data[thr].unc_len || data[thr].unc_len > UNC_SIZE || data[thr].unc_len & (PAGE_SIZE - 1))) { - pr_err("Invalid uncompressed length\n"); + pr_err("Invalid %s uncompressed length\n", hib_comp_algo); ret = -1; goto out_finish; } @@ -1464,9 +1509,12 @@ out_clean: kfree(crc); } if (data) { - for (thr = 0; thr < nr_threads; thr++) + for (thr = 0; thr < nr_threads; thr++) { if (data[thr].thr) kthread_stop(data[thr].thr); + if (data[thr].cc) + crypto_free_comp(data[thr].cc); + } vfree(data); } vfree(page); @@ -1535,6 +1583,7 @@ int swsusp_check(bool exclusive) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); + swsusp_header_flags = swsusp_header->flags; /* Reset swap signature now */ error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, -- cgit v1.2.3 From 8bc29736357e7f9a6bd0d16b57b5612197e1924b Mon Sep 17 00:00:00 2001 From: Nikhil V Date: Mon, 22 Jan 2024 18:45:27 +0530 Subject: PM: hibernate: Add support for LZ4 compression for hibernation Extend the support for LZ4 compression to be used with hibernation. The main idea is that different compression algorithms have different characteristics and hibernation may benefit when it uses any of these algorithms: a default algorithm, having higher compression rate but is slower(compression/decompression) and a secondary algorithm, that is faster(compression/decompression) but has lower compression rate. LZ4 algorithm has better decompression speeds over LZO. This reduces the hibernation image restore time. As per test results: LZO LZ4 Size before Compression(bytes) 682696704 682393600 Size after Compression(bytes) 146502402 155993547 Decompression Rate 335.02 MB/s 501.05 MB/s Restore time 4.4s 3.8s LZO is the default compression algorithm used for hibernation. Enable CONFIG_HIBERNATION_COMP_LZ4 to set the default compressor as LZ4. Signed-off-by: Nikhil V Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 5 +++++ kernel/power/hibernate.c | 25 ++++++++++++++++++++++--- kernel/power/power.h | 14 ++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index d4167159bae8..afce8130d8b9 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -101,11 +101,16 @@ config HIBERNATION_COMP_LZO bool "lzo" depends on CRYPTO_LZO +config HIBERNATION_COMP_LZ4 + bool "lz4" + depends on CRYPTO_LZ4 + endchoice config HIBERNATION_DEF_COMP string default "lzo" if HIBERNATION_COMP_LZO + default "lz4" if HIBERNATION_COMP_LZ4 help Default compressor to be used for hibernation. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 76b7ff619c90..219191d6d0e8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -727,6 +727,9 @@ static int load_image_and_restore(void) return error; } +#define COMPRESSION_ALGO_LZO "lzo" +#define COMPRESSION_ALGO_LZ4 "lz4" + /** * hibernate - Carry out system hibernation, including saving the image. */ @@ -786,11 +789,24 @@ int hibernate(void) if (hibernation_mode == HIBERNATION_PLATFORM) flags |= SF_PLATFORM_MODE; - if (nocompress) + if (nocompress) { flags |= SF_NOCOMPRESS_MODE; - else + } else { flags |= SF_CRC32_MODE; + /* + * By default, LZO compression is enabled. Use SF_COMPRESSION_ALG_LZ4 + * to override this behaviour and use LZ4. + * + * Refer kernel/power/power.h for more details + */ + + if (!strcmp(hib_comp_algo, COMPRESSION_ALGO_LZ4)) + flags |= SF_COMPRESSION_ALG_LZ4; + else + flags |= SF_COMPRESSION_ALG_LZO; + } + pm_pr_dbg("Writing hibernation image.\n"); error = swsusp_write(flags); swsusp_free(); @@ -980,7 +996,10 @@ static int software_resume(void) * the algorithm support. */ if (!(swsusp_header_flags & SF_NOCOMPRESS_MODE)) { - strscpy(hib_comp_algo, default_compressor, sizeof(hib_comp_algo)); + if (swsusp_header_flags & SF_COMPRESSION_ALG_LZ4) + strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4, sizeof(hib_comp_algo)); + else + strscpy(hib_comp_algo, default_compressor, sizeof(hib_comp_algo)); if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) { pr_err("%s compression is not available\n", hib_comp_algo); error = -EOPNOTSUPP; diff --git a/kernel/power/power.h b/kernel/power/power.h index 5efa2c987057..518349272848 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -167,11 +167,25 @@ extern int swsusp_swap_in_use(void); * Flags that can be passed from the hibernatig hernel to the "boot" kernel in * the image header. */ +#define SF_COMPRESSION_ALG_LZO 0 /* dummy, details given below */ #define SF_PLATFORM_MODE 1 #define SF_NOCOMPRESS_MODE 2 #define SF_CRC32_MODE 4 #define SF_HW_SIG 8 +/* + * Bit to indicate the compression algorithm to be used(for LZ4). The same + * could be checked while saving/loading image to/from disk to use the + * corresponding algorithms. + * + * By default, LZO compression is enabled if SF_CRC32_MODE is set. Use + * SF_COMPRESSION_ALG_LZ4 to override this behaviour and use LZ4. + * + * SF_CRC32_MODE, SF_COMPRESSION_ALG_LZO(dummy) -> Compression, LZO + * SF_CRC32_MODE, SF_COMPRESSION_ALG_LZ4 -> Compression, LZ4 + */ +#define SF_COMPRESSION_ALG_LZ4 16 + /* kernel/power/hibernate.c */ int swsusp_check(bool exclusive); extern void swsusp_free(void); -- cgit v1.2.3 From 4274521fabee05375d10bea0e36a806ed4ab7b45 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:35 +0000 Subject: PM: EM: Add missing newline for the message log Fix missing newline for the string long in the error code path. Reviewed-by: Hongyan Xia Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7b44f5b89fa1..8b9dd4a39f63 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -250,7 +250,7 @@ static void em_cpufreq_update_efficiencies(struct device *dev) policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); if (!policy) { - dev_warn(dev, "EM: Access to CPUFreq policy failed"); + dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); return; } -- cgit v1.2.3 From e7b1cc9a7ea6d7862baac0fd7b145573618727dd Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:36 +0000 Subject: PM: EM: Extend em_cpufreq_update_efficiencies() argument list In order to prepare the code for the modifiable EM perf_state table, make em_cpufreq_update_efficiencies() take a pointer to the EM table as its second argument and modify it to use that new argument instead of the 'table' member of dev->em_pd. No functional impact. Reviewed-by: Hongyan Xia Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 8b9dd4a39f63..8c373b151875 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -237,15 +237,15 @@ static int em_create_pd(struct device *dev, int nr_states, return 0; } -static void em_cpufreq_update_efficiencies(struct device *dev) +static void +em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) { struct em_perf_domain *pd = dev->em_pd; - struct em_perf_state *table; struct cpufreq_policy *policy; int found = 0; int i; - if (!_is_cpu_device(dev) || !pd) + if (!_is_cpu_device(dev)) return; policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); @@ -254,8 +254,6 @@ static void em_cpufreq_update_efficiencies(struct device *dev) return; } - table = pd->table; - for (i = 0; i < pd->nr_perf_states; i++) { if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) continue; @@ -397,7 +395,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, dev->em_pd->flags |= flags; - em_cpufreq_update_efficiencies(dev); + em_cpufreq_update_efficiencies(dev, dev->em_pd->table); em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); -- cgit v1.2.3 From 99907d6054f2d39a625004f9f4e3fe9297838a3c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:37 +0000 Subject: PM: EM: Find first CPU active while updating OPP efficiency The Energy Model might be updated at runtime and the energy efficiency for each OPP may change. Thus, there is a need to update also the cpufreq framework and make it aligned to the new values. In order to do that, use a first active CPU from the Performance Domain. This is needed since the first CPU in the cpumask might be offline when we run this code path. Reviewed-by: Hongyan Xia Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 8c373b151875..0c3220ff54f7 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -243,12 +243,19 @@ em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) struct em_perf_domain *pd = dev->em_pd; struct cpufreq_policy *policy; int found = 0; - int i; + int i, cpu; if (!_is_cpu_device(dev)) return; - policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); + /* Try to get a CPU which is active and in this PD */ + cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); + if (cpu >= nr_cpu_ids) { + dev_warn(dev, "EM: No online CPU for CPUFreq policy\n"); + return; + } + + policy = cpufreq_cpu_get(cpu); if (!policy) { dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); return; -- cgit v1.2.3 From faf7075b79a259136e2b57ce52b48a7096270e8b Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:39 +0000 Subject: PM: EM: Introduce em_compute_costs() Move the EM costs computation code into a new dedicated function, em_compute_costs(), that can be reused in other places in the future. This change is not expected to alter the general functionality. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 72 +++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0c3220ff54f7..5c47caaf270e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -103,14 +103,52 @@ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} #endif +static int em_compute_costs(struct device *dev, struct em_perf_state *table, + struct em_data_callback *cb, int nr_states, + unsigned long flags) +{ + unsigned long prev_cost = ULONG_MAX; + u64 fmax; + int i, ret; + + /* Compute the cost of each performance state. */ + fmax = (u64) table[nr_states - 1].frequency; + for (i = nr_states - 1; i >= 0; i--) { + unsigned long power_res, cost; + + if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { + ret = cb->get_cost(dev, table[i].frequency, &cost); + if (ret || !cost || cost > EM_MAX_POWER) { + dev_err(dev, "EM: invalid cost %lu %d\n", + cost, ret); + return -EINVAL; + } + } else { + power_res = table[i].power; + cost = div64_u64(fmax * power_res, table[i].frequency); + } + + table[i].cost = cost; + + if (table[i].cost >= prev_cost) { + table[i].flags = EM_PERF_STATE_INEFFICIENT; + dev_dbg(dev, "EM: OPP:%lu is inefficient\n", + table[i].frequency); + } else { + prev_cost = table[i].cost; + } + } + + return 0; +} + static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, int nr_states, struct em_data_callback *cb, unsigned long flags) { - unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; + unsigned long power, freq, prev_freq = 0; struct em_perf_state *table; int i, ret; - u64 fmax; table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); if (!table) @@ -154,33 +192,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].frequency = prev_freq = freq; } - /* Compute the cost of each performance state. */ - fmax = (u64) table[nr_states - 1].frequency; - for (i = nr_states - 1; i >= 0; i--) { - unsigned long power_res, cost; - - if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { - ret = cb->get_cost(dev, table[i].frequency, &cost); - if (ret || !cost || cost > EM_MAX_POWER) { - dev_err(dev, "EM: invalid cost %lu %d\n", - cost, ret); - goto free_ps_table; - } - } else { - power_res = table[i].power; - cost = div64_u64(fmax * power_res, table[i].frequency); - } - - table[i].cost = cost; - - if (table[i].cost >= prev_cost) { - table[i].flags = EM_PERF_STATE_INEFFICIENT; - dev_dbg(dev, "EM: OPP:%lu is inefficient\n", - table[i].frequency); - } else { - prev_cost = table[i].cost; - } - } + ret = em_compute_costs(dev, table, cb, nr_states, flags); + if (ret) + goto free_ps_table; pd->table = table; pd->nr_perf_states = nr_states; -- cgit v1.2.3 From 818867224d41725dcf4abe890d8f24e5d6bd9c67 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:40 +0000 Subject: PM: EM: Check if the get_cost() callback is present in em_compute_costs() Subsequent changes will introduce a case in which 'cb->get_cost' may not be set in em_compute_costs(), so add a check to ensure that it is not NULL before attempting to dereference it. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 5c47caaf270e..21d761223255 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -116,7 +116,7 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res, cost; - if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { + if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) { ret = cb->get_cost(dev, table[i].frequency, &cost); if (ret || !cost || cost > EM_MAX_POWER) { dev_err(dev, "EM: invalid cost %lu %d\n", -- cgit v1.2.3 From 8552d6820168d6508bd1f7cd49be248dcb74efb3 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:41 +0000 Subject: PM: EM: Split the allocation and initialization of the EM table Split the process of allocation and data initialization for the EM table. The upcoming changes for modifiable EM will use it. This change is not expected to alter the general functionality. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 55 +++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 21d761223255..7468fa92134b 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -142,18 +142,26 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return 0; } +static int em_allocate_perf_table(struct em_perf_domain *pd, + int nr_states) +{ + pd->table = kcalloc(nr_states, sizeof(struct em_perf_state), + GFP_KERNEL); + if (!pd->table) + return -ENOMEM; + + return 0; +} + static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, - int nr_states, struct em_data_callback *cb, + struct em_perf_state *table, + struct em_data_callback *cb, unsigned long flags) { unsigned long power, freq, prev_freq = 0; - struct em_perf_state *table; + int nr_states = pd->nr_perf_states; int i, ret; - table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; - /* Build the list of performance states for this performance domain */ for (i = 0, freq = 0; i < nr_states; i++, freq++) { /* @@ -165,7 +173,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, if (ret) { dev_err(dev, "EM: invalid perf. state: %d\n", ret); - goto free_ps_table; + return -EINVAL; } /* @@ -175,7 +183,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, if (freq <= prev_freq) { dev_err(dev, "EM: non-increasing freq: %lu\n", freq); - goto free_ps_table; + return -EINVAL; } /* @@ -185,7 +193,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, if (!power || power > EM_MAX_POWER) { dev_err(dev, "EM: invalid power: %lu\n", power); - goto free_ps_table; + return -EINVAL; } table[i].power = power; @@ -194,16 +202,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, ret = em_compute_costs(dev, table, cb, nr_states, flags); if (ret) - goto free_ps_table; - - pd->table = table; - pd->nr_perf_states = nr_states; + return -EINVAL; return 0; - -free_ps_table: - kfree(table); - return -EINVAL; } static int em_create_pd(struct device *dev, int nr_states, @@ -234,11 +235,15 @@ static int em_create_pd(struct device *dev, int nr_states, return -ENOMEM; } - ret = em_create_perf_table(dev, pd, nr_states, cb, flags); - if (ret) { - kfree(pd); - return ret; - } + pd->nr_perf_states = nr_states; + + ret = em_allocate_perf_table(pd, nr_states); + if (ret) + goto free_pd; + + ret = em_create_perf_table(dev, pd, pd->table, cb, flags); + if (ret) + goto free_pd_table; if (_is_cpu_device(dev)) for_each_cpu(cpu, cpus) { @@ -249,6 +254,12 @@ static int em_create_pd(struct device *dev, int nr_states, dev->em_pd = pd; return 0; + +free_pd_table: + kfree(pd->table); +free_pd: + kfree(pd); + return -EINVAL; } static void -- cgit v1.2.3 From ca0fc871f16f4bef746b5ba814b67afb59119700 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:42 +0000 Subject: PM: EM: Introduce runtime modifiable table The new runtime table can be populated with a new power data to better reflect the actual efficiency of the device e.g. CPU. The power can vary over time e.g. due to the SoC temperature change. Higher temperature can increase power values. For longer running scenarios, such as game or camera, when also other devices are used (e.g. GPU, ISP) the CPU power can change. The new EM framework is able to addresses this issue and change the EM data at runtime safely. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7468fa92134b..131ff1d0dc5b 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -23,6 +23,9 @@ */ static DEFINE_MUTEX(em_pd_mutex); +static void em_cpufreq_update_efficiencies(struct device *dev, + struct em_perf_state *table); + static bool _is_cpu_device(struct device *dev) { return (dev->bus == &cpu_subsys); @@ -103,6 +106,31 @@ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} #endif +static void em_destroy_table_rcu(struct rcu_head *rp) +{ + struct em_perf_table __rcu *table; + + table = container_of(rp, struct em_perf_table, rcu); + kfree(table); +} + +static void em_free_table(struct em_perf_table __rcu *table) +{ + call_rcu(&table->rcu, em_destroy_table_rcu); +} + +static struct em_perf_table __rcu * +em_allocate_table(struct em_perf_domain *pd) +{ + struct em_perf_table __rcu *table; + int table_size; + + table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; + + table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); + return table; +} + static int em_compute_costs(struct device *dev, struct em_perf_state *table, struct em_data_callback *cb, int nr_states, unsigned long flags) @@ -153,6 +181,24 @@ static int em_allocate_perf_table(struct em_perf_domain *pd, return 0; } +static int em_create_runtime_table(struct em_perf_domain *pd) +{ + struct em_perf_table __rcu *table; + int table_size; + + table = em_allocate_table(pd); + if (!table) + return -ENOMEM; + + /* Initialize runtime table with existing data */ + table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; + memcpy(table->state, pd->table, table_size); + + rcu_assign_pointer(pd->em_table, table); + + return 0; +} + static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, struct em_data_callback *cb, @@ -245,6 +291,10 @@ static int em_create_pd(struct device *dev, int nr_states, if (ret) goto free_pd_table; + ret = em_create_runtime_table(pd); + if (ret) + goto free_pd_table; + if (_is_cpu_device(dev)) for_each_cpu(cpu, cpus) { cpu_dev = get_cpu_device(cpu); @@ -461,6 +511,9 @@ void em_dev_unregister_perf_domain(struct device *dev) em_debug_remove_pd(dev); kfree(dev->em_pd->table); + + em_free_table(dev->em_pd->em_table); + kfree(dev->em_pd); dev->em_pd = NULL; mutex_unlock(&em_pd_mutex); -- cgit v1.2.3 From ffcf9bce7af02a21fb73738999de1e3d4fde5aca Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:44 +0000 Subject: PM: EM: Add functions for memory allocations for new EM tables The runtime modified EM table can be provided from drivers. Create mechanism which allows safely allocate and free the table for device drivers. The same table can be used by the EAS in task scheduler code paths, so make sure the memory is not freed when the device driver module is unloaded. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 131ff1d0dc5b..16795743f969 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -114,13 +114,36 @@ static void em_destroy_table_rcu(struct rcu_head *rp) kfree(table); } -static void em_free_table(struct em_perf_table __rcu *table) +static void em_release_table_kref(struct kref *kref) { + struct em_perf_table __rcu *table; + + /* It was the last owner of this table so we can free */ + table = container_of(kref, struct em_perf_table, kref); + call_rcu(&table->rcu, em_destroy_table_rcu); } -static struct em_perf_table __rcu * -em_allocate_table(struct em_perf_domain *pd) +/** + * em_table_free() - Handles safe free of the EM table when needed + * @table : EM table which is going to be freed + * + * No return values. + */ +void em_table_free(struct em_perf_table __rcu *table) +{ + kref_put(&table->kref, em_release_table_kref); +} + +/** + * em_table_alloc() - Allocate a new EM table + * @pd : EM performance domain for which this must be done + * + * Allocate a new EM table and initialize its kref to indicate that it + * has a user. + * Returns allocated table or NULL. + */ +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) { struct em_perf_table __rcu *table; int table_size; @@ -128,6 +151,11 @@ em_allocate_table(struct em_perf_domain *pd) table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); + if (!table) + return NULL; + + kref_init(&table->kref); + return table; } @@ -186,7 +214,7 @@ static int em_create_runtime_table(struct em_perf_domain *pd) struct em_perf_table __rcu *table; int table_size; - table = em_allocate_table(pd); + table = em_table_alloc(pd); if (!table) return -ENOMEM; @@ -512,7 +540,7 @@ void em_dev_unregister_perf_domain(struct device *dev) kfree(dev->em_pd->table); - em_free_table(dev->em_pd->em_table); + em_table_free(dev->em_pd->em_table); kfree(dev->em_pd); dev->em_pd = NULL; -- cgit v1.2.3 From 977230d5d50314f9920d3ee6348773d8babbfb58 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:45 +0000 Subject: PM: EM: Introduce em_dev_update_perf_domain() for EM updates Add API function em_dev_update_perf_domain() which allows the EM to be changed safely. Concurrent updaters are serialized with a mutex and the removal of memory that will not be used any more is carried out with the help of RCU. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 16795743f969..667619b70be7 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -209,6 +209,50 @@ static int em_allocate_perf_table(struct em_perf_domain *pd, return 0; } +/** + * em_dev_update_perf_domain() - Update runtime EM table for a device + * @dev : Device for which the EM is to be updated + * @new_table : The new EM table that is going to be used from now + * + * Update EM runtime modifiable table for the @dev using the provided @table. + * + * This function uses a mutex to serialize writers, so it must not be called + * from a non-sleeping context. + * + * Return 0 on success or an error code on failure. + */ +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table) +{ + struct em_perf_table __rcu *old_table; + struct em_perf_domain *pd; + + if (!dev) + return -EINVAL; + + /* Serialize update/unregister or concurrent updates */ + mutex_lock(&em_pd_mutex); + + if (!dev->em_pd) { + mutex_unlock(&em_pd_mutex); + return -EINVAL; + } + pd = dev->em_pd; + + kref_get(&new_table->kref); + + old_table = pd->em_table; + rcu_assign_pointer(pd->em_table, new_table); + + em_cpufreq_update_efficiencies(dev, new_table->state); + + em_table_free(old_table); + + mutex_unlock(&em_pd_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); + static int em_create_runtime_table(struct em_perf_domain *pd) { struct em_perf_table __rcu *table; -- cgit v1.2.3 From 5a367f7b7014af86bd1ac0865a42db55187dbd3c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:47 +0000 Subject: PM: EM: Add performance field to struct em_perf_state and optimize The performance doesn't scale linearly with the frequency. Also, it may be different in different workloads. Some CPUs are designed to be particularly good at some applications e.g. images or video processing and other CPUs in different. When those different types of CPUs are combined in one SoC they should be properly modeled to get max of the HW in Energy Aware Scheduler (EAS). The Energy Model (EM) provides the power vs. performance curves to the EAS, but assumes the CPUs capacity is fixed and scales linearly with the frequency. This patch allows to adjust the curve on the 'performance' axis as well. Code speed optimization: Removing map_util_freq() allows to avoid one division and one multiplication operations from the EAS hot code path. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 667619b70be7..41418aa6daa6 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -46,6 +46,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) debugfs_create_ulong("frequency", 0444, d, &ps->frequency); debugfs_create_ulong("power", 0444, d, &ps->power); debugfs_create_ulong("cost", 0444, d, &ps->cost); + debugfs_create_ulong("performance", 0444, d, &ps->performance); debugfs_create_ulong("inefficient", 0444, d, &ps->flags); } @@ -159,6 +160,30 @@ struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) return table; } +static void em_init_performance(struct device *dev, struct em_perf_domain *pd, + struct em_perf_state *table, int nr_states) +{ + u64 fmax, max_cap; + int i, cpu; + + /* This is needed only for CPUs and EAS skip other devices */ + if (!_is_cpu_device(dev)) + return; + + cpu = cpumask_first(em_span_cpus(pd)); + + /* + * Calculate the performance value for each frequency with + * linear relationship. The final CPU capacity might not be ready at + * boot time, but the EM will be updated a bit later with correct one. + */ + fmax = (u64) table[nr_states - 1].frequency; + max_cap = (u64) arch_scale_cpu_capacity(cpu); + for (i = 0; i < nr_states; i++) + table[i].performance = div64_u64(max_cap * table[i].frequency, + fmax); +} + static int em_compute_costs(struct device *dev, struct em_perf_state *table, struct em_data_callback *cb, int nr_states, unsigned long flags) @@ -318,6 +343,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].frequency = prev_freq = freq; } + em_init_performance(dev, pd, table, nr_states); + ret = em_compute_costs(dev, table, cb, nr_states, flags); if (ret) return -EINVAL; -- cgit v1.2.3 From e3f1164fc9ee8430b3a51e400abfa1b67664f538 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:48 +0000 Subject: PM: EM: Support late CPUs booting and capacity adjustment The patch adds needed infrastructure to handle the late CPUs boot, which might change the previous CPUs capacity values. With this changes the new CPUs which try to register EM will trigger the needed re-calculations for other CPUs EMs. Thanks to that the em_per_state::performance values will be aligned with the CPU capacity information after all CPUs finish the boot and EM registrations. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 124 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 41418aa6daa6..b192b0ac8c6e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -25,6 +25,9 @@ static DEFINE_MUTEX(em_pd_mutex); static void em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table); +static void em_check_capacity_update(void); +static void em_update_workfn(struct work_struct *work); +static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn); static bool _is_cpu_device(struct device *dev) { @@ -583,6 +586,10 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, unlock: mutex_unlock(&em_pd_mutex); + + if (_is_cpu_device(dev)) + em_check_capacity_update(); + return ret; } EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); @@ -618,3 +625,120 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_unlock(&em_pd_mutex); } EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); + +/* + * Adjustment of CPU performance values after boot, when all CPUs capacites + * are correctly calculated. + */ +static void em_adjust_new_capacity(struct device *dev, + struct em_perf_domain *pd, + u64 max_cap) +{ + struct em_perf_table __rcu *em_table; + struct em_perf_state *ps, *new_ps; + int ret, ps_size; + + em_table = em_table_alloc(pd); + if (!em_table) { + dev_warn(dev, "EM: allocation failed\n"); + return; + } + + new_ps = em_table->state; + + rcu_read_lock(); + ps = em_perf_state_from_pd(pd); + /* Initialize data based on old table */ + ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states; + memcpy(new_ps, ps, ps_size); + + rcu_read_unlock(); + + em_init_performance(dev, pd, new_ps, pd->nr_perf_states); + ret = em_compute_costs(dev, new_ps, NULL, pd->nr_perf_states, + pd->flags); + if (ret) { + dev_warn(dev, "EM: compute costs failed\n"); + return; + } + + ret = em_dev_update_perf_domain(dev, em_table); + if (ret) + dev_warn(dev, "EM: update failed %d\n", ret); + + /* + * This is one-time-update, so give up the ownership in this updater. + * The EM framework has incremented the usage counter and from now + * will keep the reference (then free the memory when needed). + */ + em_table_free(em_table); +} + +static void em_check_capacity_update(void) +{ + cpumask_var_t cpu_done_mask; + struct em_perf_state *table; + struct em_perf_domain *pd; + unsigned long cpu_capacity; + int cpu; + + if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) { + pr_warn("no free memory\n"); + return; + } + + /* Check if CPUs capacity has changed than update EM */ + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy; + unsigned long em_max_perf; + struct device *dev; + int nr_states; + + if (cpumask_test_cpu(cpu, cpu_done_mask)) + continue; + + policy = cpufreq_cpu_get(cpu); + if (!policy) { + pr_debug("Accessing cpu%d policy failed\n", cpu); + schedule_delayed_work(&em_update_work, + msecs_to_jiffies(1000)); + break; + } + cpufreq_cpu_put(policy); + + pd = em_cpu_get(cpu); + if (!pd || em_is_artificial(pd)) + continue; + + cpumask_or(cpu_done_mask, cpu_done_mask, + em_span_cpus(pd)); + + nr_states = pd->nr_perf_states; + cpu_capacity = arch_scale_cpu_capacity(cpu); + + rcu_read_lock(); + table = em_perf_state_from_pd(pd); + em_max_perf = table[pd->nr_perf_states - 1].performance; + rcu_read_unlock(); + + /* + * Check if the CPU capacity has been adjusted during boot + * and trigger the update for new performance values. + */ + if (em_max_perf == cpu_capacity) + continue; + + pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", + cpu, cpu_capacity, em_max_perf); + + dev = get_cpu_device(cpu); + em_adjust_new_capacity(dev, pd, cpu_capacity); + } + + free_cpumask_var(cpu_done_mask); +} + +static void em_update_workfn(struct work_struct *work) +{ + em_check_capacity_update(); +} -- cgit v1.2.3 From 1b600da510735a0f92c8b4140a7e2cb037a6a6c3 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:49 +0000 Subject: PM: EM: Optimize em_cpu_energy() and remove division The Energy Model (EM) can be modified at runtime which brings new possibilities. The em_cpu_energy() is called by the Energy Aware Scheduler (EAS) in its hot path. The energy calculation uses power value for a given performance state (ps) and the CPU busy time as percentage for that given frequency. It is possible to avoid the division by 'scale_cpu' at runtime, because EM is updated whenever new max capacity CPU is set in the system. Use that feature and do the needed division during the calculation of the coefficient 'ps->cost'. That enhanced 'ps->cost' value can be then just multiplied simply by utilization: pd_nrg = ps->cost * \Sum cpu_util to get the needed energy for whole Performance Domain (PD). With this optimization and earlier removal of map_util_freq(), the em_cpu_energy() should run faster on the Big CPU by 1.43x and on the Little CPU by 1.69x (RockPi 4B board). Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index b192b0ac8c6e..a631d7d52c40 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -192,11 +192,9 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, unsigned long flags) { unsigned long prev_cost = ULONG_MAX; - u64 fmax; int i, ret; /* Compute the cost of each performance state. */ - fmax = (u64) table[nr_states - 1].frequency; for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res, cost; @@ -208,8 +206,9 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return -EINVAL; } } else { - power_res = table[i].power; - cost = div64_u64(fmax * power_res, table[i].frequency); + /* increase resolution of 'cost' precision */ + power_res = table[i].power * 10; + cost = power_res / table[i].performance; } table[i].cost = cost; -- cgit v1.2.3 From 09417e673cbd578a1eaf8aba34a668119622d79c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:54 +0000 Subject: PM: EM: Change debugfs configuration to use runtime EM table data Dump the runtime EM table values which can be modified in time. In order to do that allocate chunk of debug memory which can be later freed automatically thanks to devm_kcalloc(). This design can handle the fact that the EM table memory can change after EM update, so debug code cannot use the pointer from initialization phase. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 67 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index a631d7d52c40..548908e686ed 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -37,20 +37,65 @@ static bool _is_cpu_device(struct device *dev) #ifdef CONFIG_DEBUG_FS static struct dentry *rootdir; -static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) +struct em_dbg_info { + struct em_perf_domain *pd; + int ps_id; +}; + +#define DEFINE_EM_DBG_SHOW(name, fname) \ +static int em_debug_##fname##_show(struct seq_file *s, void *unused) \ +{ \ + struct em_dbg_info *em_dbg = s->private; \ + struct em_perf_state *table; \ + unsigned long val; \ + \ + rcu_read_lock(); \ + table = em_perf_state_from_pd(em_dbg->pd); \ + val = table[em_dbg->ps_id].name; \ + rcu_read_unlock(); \ + \ + seq_printf(s, "%lu\n", val); \ + return 0; \ +} \ +DEFINE_SHOW_ATTRIBUTE(em_debug_##fname) + +DEFINE_EM_DBG_SHOW(frequency, frequency); +DEFINE_EM_DBG_SHOW(power, power); +DEFINE_EM_DBG_SHOW(cost, cost); +DEFINE_EM_DBG_SHOW(performance, performance); +DEFINE_EM_DBG_SHOW(flags, inefficiency); + +static void em_debug_create_ps(struct em_perf_domain *em_pd, + struct em_dbg_info *em_dbg, int i, + struct dentry *pd) { + struct em_perf_state *table; + unsigned long freq; struct dentry *d; char name[24]; - snprintf(name, sizeof(name), "ps:%lu", ps->frequency); + em_dbg[i].pd = em_pd; + em_dbg[i].ps_id = i; + + rcu_read_lock(); + table = em_perf_state_from_pd(em_pd); + freq = table[i].frequency; + rcu_read_unlock(); + + snprintf(name, sizeof(name), "ps:%lu", freq); /* Create per-ps directory */ d = debugfs_create_dir(name, pd); - debugfs_create_ulong("frequency", 0444, d, &ps->frequency); - debugfs_create_ulong("power", 0444, d, &ps->power); - debugfs_create_ulong("cost", 0444, d, &ps->cost); - debugfs_create_ulong("performance", 0444, d, &ps->performance); - debugfs_create_ulong("inefficient", 0444, d, &ps->flags); + debugfs_create_file("frequency", 0444, d, &em_dbg[i], + &em_debug_frequency_fops); + debugfs_create_file("power", 0444, d, &em_dbg[i], + &em_debug_power_fops); + debugfs_create_file("cost", 0444, d, &em_dbg[i], + &em_debug_cost_fops); + debugfs_create_file("performance", 0444, d, &em_dbg[i], + &em_debug_performance_fops); + debugfs_create_file("inefficient", 0444, d, &em_dbg[i], + &em_debug_inefficiency_fops); } static int em_debug_cpus_show(struct seq_file *s, void *unused) @@ -73,6 +118,7 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_flags); static void em_debug_create_pd(struct device *dev) { + struct em_dbg_info *em_dbg; struct dentry *d; int i; @@ -86,9 +132,14 @@ static void em_debug_create_pd(struct device *dev) debugfs_create_file("flags", 0444, d, dev->em_pd, &em_debug_flags_fops); + em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states, + sizeof(*em_dbg), GFP_KERNEL); + if (!em_dbg) + return; + /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) - em_debug_create_ps(&dev->em_pd->table[i], d); + em_debug_create_ps(dev->em_pd, em_dbg, i, d); } -- cgit v1.2.3 From 24e9fb635df2790eccb0e95ff65c6dee7a97fcb7 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:55 +0000 Subject: PM: EM: Remove old table Remove the old EM table which wasn't able to modify the data. Clean the unneeded function and refactor the code a bit. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 46 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 548908e686ed..57838d28af85 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -276,17 +276,6 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return 0; } -static int em_allocate_perf_table(struct em_perf_domain *pd, - int nr_states) -{ - pd->table = kcalloc(nr_states, sizeof(struct em_perf_state), - GFP_KERNEL); - if (!pd->table) - return -ENOMEM; - - return 0; -} - /** * em_dev_update_perf_domain() - Update runtime EM table for a device * @dev : Device for which the EM is to be updated @@ -331,24 +320,6 @@ int em_dev_update_perf_domain(struct device *dev, } EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); -static int em_create_runtime_table(struct em_perf_domain *pd) -{ - struct em_perf_table __rcu *table; - int table_size; - - table = em_table_alloc(pd); - if (!table) - return -ENOMEM; - - /* Initialize runtime table with existing data */ - table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; - memcpy(table->state, pd->table, table_size); - - rcu_assign_pointer(pd->em_table, table); - - return 0; -} - static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, struct em_data_callback *cb, @@ -409,6 +380,7 @@ static int em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, cpumask_t *cpus, unsigned long flags) { + struct em_perf_table __rcu *em_table; struct em_perf_domain *pd; struct device *cpu_dev; int cpu, ret, num_cpus; @@ -435,17 +407,15 @@ static int em_create_pd(struct device *dev, int nr_states, pd->nr_perf_states = nr_states; - ret = em_allocate_perf_table(pd, nr_states); - if (ret) + em_table = em_table_alloc(pd); + if (!em_table) goto free_pd; - ret = em_create_perf_table(dev, pd, pd->table, cb, flags); + ret = em_create_perf_table(dev, pd, em_table->state, cb, flags); if (ret) goto free_pd_table; - ret = em_create_runtime_table(pd); - if (ret) - goto free_pd_table; + rcu_assign_pointer(pd->em_table, em_table); if (_is_cpu_device(dev)) for_each_cpu(cpu, cpus) { @@ -458,7 +428,7 @@ static int em_create_pd(struct device *dev, int nr_states, return 0; free_pd_table: - kfree(pd->table); + kfree(em_table); free_pd: kfree(pd); return -EINVAL; @@ -629,7 +599,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, dev->em_pd->flags |= flags; - em_cpufreq_update_efficiencies(dev, dev->em_pd->table); + em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state); em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); @@ -666,8 +636,6 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_lock(&em_pd_mutex); em_debug_remove_pd(dev); - kfree(dev->em_pd->table); - em_table_free(dev->em_pd->em_table); kfree(dev->em_pd); -- cgit v1.2.3 From 22ea02848c07d1cbd15a5f442138ca429866300d Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:56 +0000 Subject: PM: EM: Add em_dev_compute_costs() The device drivers can modify EM at runtime by providing a new EM table. The EM is used by the EAS and the em_perf_state::cost stores pre-calculated value to avoid overhead. This patch provides the API for device drivers to calculate the cost values properly (and not duplicate the same code). Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 57838d28af85..7101fa3fa0c0 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -276,6 +276,24 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return 0; } +/** + * em_dev_compute_costs() - Calculate cost values for new runtime EM table + * @dev : Device for which the EM table is to be updated + * @table : The new EM table that is going to get the costs calculated + * + * Calculate the em_perf_state::cost values for new runtime EM table. The + * values are used for EAS during task placement. It also calculates and sets + * the efficiency flag for each performance state. When the function finish + * successfully the EM table is ready to be updated and used by EAS. + * + * Return 0 on success or a proper error in case of failure. + */ +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states) +{ + return em_compute_costs(dev, table, NULL, nr_states, 0); +} + /** * em_dev_update_perf_domain() - Update runtime EM table for a device * @dev : Device for which the EM is to be updated -- cgit v1.2.3 From 3fec6e5961b77af6a952b77f5c2ea26f7513b216 Mon Sep 17 00:00:00 2001 From: Nikhil V Date: Wed, 14 Feb 2024 13:09:32 +0530 Subject: PM: hibernate: Support to select compression algorithm Currently the default compression algorithm is selected based on compile time options. Introduce a module parameter "hibernate.compressor" to override this behaviour. Different compression algorithms have different characteristics and hibernation may benefit when it uses any of these algorithms, especially when a secondary algorithm(LZ4) offers better decompression speeds over a default algorithm(LZO), which in turn reduces hibernation image restore time. Users can override the default algorithm in two ways: 1) Passing "hibernate.compressor" as kernel command line parameter. Usage: LZO: hibernate.compressor=lzo LZ4: hibernate.compressor=lz4 2) Specifying the algorithm at runtime. Usage: LZO: echo lzo > /sys/module/hibernate/parameters/compressor LZ4: echo lz4 > /sys/module/hibernate/parameters/compressor Currently LZO and LZ4 are the supported algorithms. LZO is the default compression algorithm used with hibernation. Signed-off-by: Nikhil V Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 57 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 219191d6d0e8..43b1a82e800c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -47,7 +47,7 @@ dev_t swsusp_resume_device; sector_t swsusp_resume_block; __visible int in_suspend __nosavedata; -static const char *default_compressor = CONFIG_HIBERNATION_DEF_COMP; +static char hibernate_compressor[CRYPTO_MAX_ALG_NAME] = CONFIG_HIBERNATION_DEF_COMP; /* * Compression/decompression algorithm to be used while saving/loading @@ -748,7 +748,7 @@ int hibernate(void) * Query for the compression algorithm support if compression is enabled. */ if (!nocompress) { - strscpy(hib_comp_algo, default_compressor, sizeof(hib_comp_algo)); + strscpy(hib_comp_algo, hibernate_compressor, sizeof(hib_comp_algo)); if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) { pr_err("%s compression is not available\n", hib_comp_algo); return -EOPNOTSUPP; @@ -999,7 +999,7 @@ static int software_resume(void) if (swsusp_header_flags & SF_COMPRESSION_ALG_LZ4) strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4, sizeof(hib_comp_algo)); else - strscpy(hib_comp_algo, default_compressor, sizeof(hib_comp_algo)); + strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO, sizeof(hib_comp_algo)); if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) { pr_err("%s compression is not available\n", hib_comp_algo); error = -EOPNOTSUPP; @@ -1422,6 +1422,57 @@ static int __init nohibernate_setup(char *str) return 1; } +static const char * const comp_alg_enabled[] = { +#if IS_ENABLED(CONFIG_CRYPTO_LZO) + COMPRESSION_ALGO_LZO, +#endif +#if IS_ENABLED(CONFIG_CRYPTO_LZ4) + COMPRESSION_ALGO_LZ4, +#endif +}; + +static int hibernate_compressor_param_set(const char *compressor, + const struct kernel_param *kp) +{ + unsigned int sleep_flags; + int index, ret; + + sleep_flags = lock_system_sleep(); + + index = sysfs_match_string(comp_alg_enabled, compressor); + if (index >= 0) { + ret = param_set_copystring(comp_alg_enabled[index], kp); + if (!ret) + strscpy(hib_comp_algo, comp_alg_enabled[index], + sizeof(hib_comp_algo)); + } else { + ret = index; + } + + unlock_system_sleep(sleep_flags); + + if (ret) + pr_debug("Cannot set specified compressor %s\n", + compressor); + + return ret; +} + +static const struct kernel_param_ops hibernate_compressor_param_ops = { + .set = hibernate_compressor_param_set, + .get = param_get_string, +}; + +static struct kparam_string hibernate_compressor_param_string = { + .maxlen = sizeof(hibernate_compressor), + .string = hibernate_compressor, +}; + +module_param_cb(compressor, &hibernate_compressor_param_ops, + &hibernate_compressor_param_string, 0644); +MODULE_PARM_DESC(compressor, + "Compression algorithm to be used with hibernation"); + __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); -- cgit v1.2.3 From f4311756a83fb01c28a9bf841cbb7eb2b318eebf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 18 Feb 2024 09:40:58 +0100 Subject: PM: hibernate: Don't ignore return from set_memory_ro() set_memory_ro() and set_memory_rw() can fail, leaving memory unprotected. Take the returned value into account and abort in case of failure. Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Signed-off-by: Rafael J. Wysocki --- kernel/power/power.h | 2 +- kernel/power/snapshot.c | 25 ++++++++++++++++--------- kernel/power/swap.c | 8 ++++---- kernel/power/user.c | 4 +++- 4 files changed, 24 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/power/power.h b/kernel/power/power.h index 518349272848..de0e6b1077f2 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -153,7 +153,7 @@ extern unsigned int snapshot_additional_pages(struct zone *zone); extern unsigned long snapshot_get_image_size(void); extern int snapshot_read_next(struct snapshot_handle *handle); extern int snapshot_write_next(struct snapshot_handle *handle); -extern void snapshot_write_finalize(struct snapshot_handle *handle); +int snapshot_write_finalize(struct snapshot_handle *handle); extern int snapshot_image_loaded(struct snapshot_handle *handle); extern bool hibernate_acquire(void); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5c96ff067c64..405eddbda4fc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -58,22 +58,24 @@ static inline void hibernate_restore_protection_end(void) hibernate_restore_protection_active = false; } -static inline void hibernate_restore_protect_page(void *page_address) +static inline int __must_check hibernate_restore_protect_page(void *page_address) { if (hibernate_restore_protection_active) - set_memory_ro((unsigned long)page_address, 1); + return set_memory_ro((unsigned long)page_address, 1); + return 0; } -static inline void hibernate_restore_unprotect_page(void *page_address) +static inline int hibernate_restore_unprotect_page(void *page_address) { if (hibernate_restore_protection_active) - set_memory_rw((unsigned long)page_address, 1); + return set_memory_rw((unsigned long)page_address, 1); + return 0; } #else static inline void hibernate_restore_protection_begin(void) {} static inline void hibernate_restore_protection_end(void) {} -static inline void hibernate_restore_protect_page(void *page_address) {} -static inline void hibernate_restore_unprotect_page(void *page_address) {} +static inline int __must_check hibernate_restore_protect_page(void *page_address) {return 0; } +static inline int hibernate_restore_unprotect_page(void *page_address) {return 0; } #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ @@ -2832,7 +2834,9 @@ next: } } else { copy_last_highmem_page(); - hibernate_restore_protect_page(handle->buffer); + error = hibernate_restore_protect_page(handle->buffer); + if (error) + return error; handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); @@ -2858,15 +2862,18 @@ next: * stored in highmem. Additionally, it recycles bitmap memory that's not * necessary any more. */ -void snapshot_write_finalize(struct snapshot_handle *handle) +int snapshot_write_finalize(struct snapshot_handle *handle) { + int error; + copy_last_highmem_page(); - hibernate_restore_protect_page(handle->buffer); + error = hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) { memory_bm_recycle(&orig_bm); free_highmem_data(); } + return error; } int snapshot_image_loaded(struct snapshot_handle *handle) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 6513035f2f7f..364342cc7f2d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -1134,8 +1134,8 @@ static int load_image(struct swap_map_handle *handle, ret = err2; if (!ret) { pr_info("Image loading done\n"); - snapshot_write_finalize(snapshot); - if (!snapshot_image_loaded(snapshot)) + ret = snapshot_write_finalize(snapshot); + if (!ret && !snapshot_image_loaded(snapshot)) ret = -ENODATA; } swsusp_show_speed(start, stop, nr_to_read, "Read"); @@ -1486,8 +1486,8 @@ out_finish: stop = ktime_get(); if (!ret) { pr_info("Image loading done\n"); - snapshot_write_finalize(snapshot); - if (!snapshot_image_loaded(snapshot)) + ret = snapshot_write_finalize(snapshot); + if (!ret && !snapshot_image_loaded(snapshot)) ret = -ENODATA; if (!ret) { if (swsusp_header->flags & SF_CRC32_MODE) { diff --git a/kernel/power/user.c b/kernel/power/user.c index 3a4e70366f35..3aa41ba22129 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -317,7 +317,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; case SNAPSHOT_ATOMIC_RESTORE: - snapshot_write_finalize(&data->handle); + error = snapshot_write_finalize(&data->handle); + if (error) + break; if (data->mode != O_WRONLY || !data->frozen || !snapshot_image_loaded(&data->handle)) { error = -EPERM; -- cgit v1.2.3 From 3a561ea2413ea5a740f3b1d6b5355d46f88a7456 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 21 Feb 2024 14:25:50 +0000 Subject: PM: EM: Fix nr_states warnings in static checks During the static checks nr_states has been mentioned by the kernel test robot. Fix the warning in those 2 places. Reported-by: kernel test robot Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7101fa3fa0c0..b686ac0345bd 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -280,6 +280,7 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, * em_dev_compute_costs() - Calculate cost values for new runtime EM table * @dev : Device for which the EM table is to be updated * @table : The new EM table that is going to get the costs calculated + * @nr_states : Number of performance states * * Calculate the em_perf_state::cost values for new runtime EM table. The * values are used for EAS during task placement. It also calculates and sets @@ -728,7 +729,6 @@ static void em_check_capacity_update(void) struct cpufreq_policy *policy; unsigned long em_max_perf; struct device *dev; - int nr_states; if (cpumask_test_cpu(cpu, cpu_done_mask)) continue; @@ -749,7 +749,6 @@ static void em_check_capacity_update(void) cpumask_or(cpu_done_mask, cpu_done_mask, em_span_cpus(pd)); - nr_states = pd->nr_perf_states; cpu_capacity = arch_scale_cpu_capacity(cpu); rcu_read_lock(); -- cgit v1.2.3 From 9bc4ffd32ef8943f5c5a42c9637cfd04771d021b Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Thu, 29 Feb 2024 12:14:59 +0530 Subject: PM: suspend: Set mem_sleep_current during kernel command line setup psci_init_system_suspend() invokes suspend_set_ops() very early during bootup even before kernel command line for mem_sleep_default is setup. This leads to kernel command line mem_sleep_default=s2idle not working as mem_sleep_current gets changed to deep via suspend_set_ops() and never changes back to s2idle. Set mem_sleep_current along with mem_sleep_default during kernel command line setup as default suspend mode. Fixes: faf7ec4a92c0 ("drivers: firmware: psci: add system suspend support") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Maulik Shah Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 742eb26618cc..e3ae93bbcb9b 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -192,6 +192,7 @@ static int __init mem_sleep_default_setup(char *str) if (mem_sleep_labels[state] && !strcmp(str, mem_sleep_labels[state])) { mem_sleep_default = state; + mem_sleep_current = state; break; } -- cgit v1.2.3