From 27e68c4b0d5a945d975140315332ea2e7aa2c57b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:26 +0000 Subject: mm/damon/core: fix a comment about damon_set_attrs() call timings The comment on damon_set_attrs() says it should not be called while the kdamond is running, but now some DAMON modules like sysfs interface and DAMON_RECLAIM call it from after_aggregation() and/or after_wmarks_check() callbacks for online tuning. Update the comment. Link: https://lkml.kernel.org/r/20230907022929.91361-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/damon/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index bcd2bd9d6c10..9160a0674aff 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -541,7 +541,11 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx, * @ctx: monitoring context * @attrs: monitoring attributes * - * This function should not be called while the kdamond is running. + * This function should be called while the kdamond is not running, or an + * access check results aggregation is not ongoing (e.g., from + * &struct damon_callback->after_aggregation or + * &struct damon_callback->after_wmarks_check callbacks). + * * Every time interval is in micro-seconds. * * Return: 0 on success, negative error code otherwise. -- cgit v1.2.3 From 2d00946bd7f4e8c17cbd2fce5fd7c3ab58046dff Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:29 +0000 Subject: mm/damon/core: remove 'struct target *' parameter from damon_aggregated tracepoint damon_aggregateed tracepoint is receiving 'struct target *', but doesn't use it. Remove it from the prototype. Link: https://lkml.kernel.org/r/20230907022929.91361-12-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 9160a0674aff..ca631dd88b33 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -776,7 +776,7 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) struct damon_region *r; damon_for_each_region(r, t) { - trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); + trace_damon_aggregated(ti, r, damon_nr_regions(t)); r->last_nr_accesses = r->nr_accesses; r->nr_accesses = 0; } -- cgit v1.2.3 From c603c630b509d690d470b9b49eb96f40482f47d5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 13 Sep 2023 02:20:49 +0000 Subject: mm/damon/core: add a tracepoint for damos apply target regions Patch series "mm/damon: add a tracepoint for damos apply target regions", v2. DAMON provides damon_aggregated tracepoint to let users record full monitoring results. Sometimes, users need to record monitoring results of specific pattern. DAMOS tried regions directory of DAMON sysfs interface allows it, but the interface is mainly designed for snapshots and therefore would be inefficient for such recording. Implement yet another tracepoint for efficient support of the usecase. This patch (of 2): DAMON provides damon_aggregated tracepoint, which exposes details of each region and its access monitoring results. It is useful for getting whole monitoring results, e.g., for recording purposes. For investigations of DAMOS, DAMON Sysfs interface provides DAMOS statistics and tried_regions directory. But, those provides only statistics and snapshots. If the scheme is frequently applied and if the user needs to know every detail of DAMOS behavior, the snapshot-based interface could be insufficient and expensive. As a last resort, userspace users need to record the all monitoring results via damon_aggregated tracepoint and simulate how DAMOS would worked. It is unnecessarily complicated. DAMON kernel API users, meanwhile, can do that easily via before_damos_apply() callback field of 'struct damon_callback', though. Add a tracepoint that will be called just after before_damos_apply() callback for more convenient investigations of DAMOS. The tracepoint exposes all details about each regions, similar to damon_aggregated tracepoint. Please note that DAMOS is currently not only for memory management but also for query-like efficient monitoring results retrievals (when 'stat' action is used). Until now, only statistics or snapshots were supported. Addition of this tracepoint allows efficient full recording of DAMOS-based filtered monitoring results. Link: https://lkml.kernel.org/r/20230913022050.2109-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230913022050.2109-2-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Steven Rostedt (Google) [tracing] Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/core.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index ca631dd88b33..3ca34a252a3c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -950,6 +950,33 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, struct timespec64 begin, end; unsigned long sz_applied = 0; int err = 0; + /* + * We plan to support multiple context per kdamond, as DAMON sysfs + * implies with 'nr_contexts' file. Nevertheless, only single context + * per kdamond is supported for now. So, we can simply use '0' context + * index here. + */ + unsigned int cidx = 0; + struct damos *siter; /* schemes iterator */ + unsigned int sidx = 0; + struct damon_target *titer; /* targets iterator */ + unsigned int tidx = 0; + bool do_trace = false; + + /* get indices for trace_damos_before_apply() */ + if (trace_damos_before_apply_enabled()) { + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + damon_for_each_target(titer, c) { + if (titer == t) + break; + tidx++; + } + do_trace = true; + } if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { @@ -964,8 +991,11 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, ktime_get_coarse_ts64(&begin); if (c->callback.before_damos_apply) err = c->callback.before_damos_apply(c, t, r, s); - if (!err) + if (!err) { + trace_damos_before_apply(cidx, sidx, tidx, r, + damon_nr_regions(t), do_trace); sz_applied = c->ops.apply_scheme(c, t, r, s); + } ktime_get_coarse_ts64(&end); quota->total_charged_ns += timespec64_to_ns(&end) - timespec64_to_ns(&begin); -- cgit v1.2.3 From 4472edf63d6630e6cf65e205b4fc8c3c94d0afe5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 14 Sep 2023 02:15:23 +0000 Subject: mm/damon/core: use number of passed access sampling as a timer DAMON sleeps for sampling interval after each sampling, and check if the aggregation interval and the ops update interval have passed using ktime_get_coarse_ts64() and baseline timestamps for the intervals. That design is for making the operations occur at deterministic timing regardless of the time that spend for each work. However, it turned out it is not that useful, and incur not-that-intuitive results. After all, timer functions, and especially sleep functions that DAMON uses to wait for specific timing, are not necessarily strictly accurate. It is legal design, so no problem. However, depending on such inaccuracies, the nr_accesses can be larger than aggregation interval divided by sampling interval. For example, with the default setting (5 ms sampling interval and 100 ms aggregation interval) we frequently show regions having nr_accesses larger than 20. Also, if the execution of a DAMOS scheme takes a long time, next aggregation could happen before enough number of samples are collected. This is not what usual users would intuitively expect. Since access check sampling is the smallest unit work of DAMON, using the number of passed sampling intervals as the DAMON-internal timer can easily avoid these problems. That is, convert aggregation and ops update intervals to numbers of sampling intervals that need to be passed before those operations be executed, count the number of passed sampling intervals, and invoke the operations as soon as the specific amount of sampling intervals passed. Make the change. Note that this could make a behavioral change to settings that using intervals that not aligned by the sampling interval. For example, if the sampling interval is 5 ms and the aggregation interval is 12 ms, DAMON effectively uses 15 ms as its aggregation interval, because it checks whether the aggregation interval after sleeping the sampling interval. This change will make DAMON to effectively use 10 ms as aggregation interval, since it uses 'aggregation interval / sampling interval * sampling interval' as the effective aggregation interval, and we don't use floating point types. Usual users would have used aligned intervals, so this behavioral change is not expected to make any meaningful impact, so just make this change. Link: https://lkml.kernel.org/r/20230914021523.60649-1-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 96 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 47 insertions(+), 49 deletions(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 3ca34a252a3c..c5b7296c69a0 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -427,8 +427,10 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; - ktime_get_coarse_ts64(&ctx->last_aggregation); - ctx->last_ops_update = ctx->last_aggregation; + ctx->passed_sample_intervals = 0; + /* These will be set from kdamond_init_intervals_sis() */ + ctx->next_aggregation_sis = 0; + ctx->next_ops_update_sis = 0; mutex_init(&ctx->kdamond_lock); @@ -552,6 +554,9 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx, */ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) { + unsigned long sample_interval = attrs->sample_interval ? + attrs->sample_interval : 1; + if (attrs->min_nr_regions < 3) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) @@ -559,6 +564,11 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) if (attrs->sample_interval > attrs->aggr_interval) return -EINVAL; + ctx->next_aggregation_sis = ctx->passed_sample_intervals + + attrs->aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->passed_sample_intervals + + attrs->ops_update_interval / sample_interval; + damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; return 0; @@ -732,38 +742,6 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) return err; } -/* - * damon_check_reset_time_interval() - Check if a time interval is elapsed. - * @baseline: the time to check whether the interval has elapsed since - * @interval: the time interval (microseconds) - * - * See whether the given time interval has passed since the given baseline - * time. If so, it also updates the baseline to current time for next check. - * - * Return: true if the time interval has passed, or false otherwise. - */ -static bool damon_check_reset_time_interval(struct timespec64 *baseline, - unsigned long interval) -{ - struct timespec64 now; - - ktime_get_coarse_ts64(&now); - if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < - interval * 1000) - return false; - *baseline = now; - return true; -} - -/* - * Check whether it is time to flush the aggregated information - */ -static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_aggregation, - ctx->attrs.aggr_interval); -} - /* * Reset the aggregated monitoring results ('nr_accesses' of each region). */ @@ -1274,18 +1252,6 @@ static void kdamond_split_regions(struct damon_ctx *ctx) last_nr_regions = nr_regions; } -/* - * Check whether it is time to check and apply the operations-related data - * structures. - * - * Returns true if it is. - */ -static bool kdamond_need_update_operations(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_ops_update, - ctx->attrs.ops_update_interval); -} - /* * Check whether current monitoring should be stopped * @@ -1397,6 +1363,17 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) return -EBUSY; } +static void kdamond_init_intervals_sis(struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + + ctx->passed_sample_intervals = 0; + ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / + sample_interval; +} + /* * The monitoring daemon that runs as a kernel thread */ @@ -1410,6 +1387,8 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + kdamond_init_intervals_sis(ctx); + if (ctx->ops.init) ctx->ops.init(ctx); if (ctx->callback.before_start && ctx->callback.before_start(ctx)) @@ -1418,6 +1397,17 @@ static int kdamond_fn(void *data) sz_limit = damon_region_sz_limit(ctx); while (!kdamond_need_stop(ctx)) { + /* + * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could + * be changed from after_wmarks_check() or after_aggregation() + * callbacks. Read the values here, and use those for this + * iteration. That is, damon_set_attrs() updated new values + * are respected from next iteration. + */ + unsigned long next_aggregation_sis = ctx->next_aggregation_sis; + unsigned long next_ops_update_sis = ctx->next_ops_update_sis; + unsigned long sample_interval = ctx->attrs.sample_interval; + if (kdamond_wait_activation(ctx)) break; @@ -1427,12 +1417,17 @@ static int kdamond_fn(void *data) ctx->callback.after_sampling(ctx)) break; - kdamond_usleep(ctx->attrs.sample_interval); + kdamond_usleep(sample_interval); + ctx->passed_sample_intervals++; if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - if (kdamond_aggregate_interval_passed(ctx)) { + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals == next_aggregation_sis) { + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); @@ -1447,7 +1442,10 @@ static int kdamond_fn(void *data) ctx->ops.reset_aggregated(ctx); } - if (kdamond_need_update_operations(ctx)) { + if (ctx->passed_sample_intervals == next_ops_update_sis) { + ctx->next_ops_update_sis = next_ops_update_sis + + ctx->attrs.ops_update_interval / + sample_interval; if (ctx->ops.update) ctx->ops.update(ctx); sz_limit = damon_region_sz_limit(ctx); -- cgit v1.2.3 From 78fbfb155d204428119310d1b9df665ab88da6e8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:44 +0000 Subject: mm/damon/core: define and use a dedicated function for region access rate update Patch series "mm/damon: provide pseudo-moving sum based access rate". DAMON checks the access to each region for every sampling interval, increase the access rate counter of the region, namely nr_accesses, if the access was made. For every aggregation interval, the counter is reset. The counter is exposed to users to be used as a metric showing the relative access rate (frequency) of each region. In other words, DAMON provides access rate of each region in every aggregation interval. The aggregation avoids temporal access pattern changes making things confusing. However, this also makes a few DAMON-related operations to unnecessarily need to be aligned to the aggregation interval. This can restrict the flexibility of DAMON applications, especially when the aggregation interval is huge. To provide the monitoring results in finer-grained timing while keeping handling of temporal access pattern change, this patchset implements a pseudo-moving sum based access rate metric. It is pseudo-moving sum because strict moving sum implementation would need to keep all values for last time window, and that could incur high overhead of there could be arbitrary number of values in a time window. Especially in case of the nr_accesses, since the sampling interval and aggregation interval can arbitrarily set and the past values should be maintained for every region, it could be risky. The pseudo-moving sum assumes there were no temporal access pattern change in last discrete time window to remove the needs for keeping the list of the last time window values. As a result, it beocmes not strict moving sum implementation, but provides a reasonable accuracy. Also, it keeps an important property of the moving sum. That is, the moving sum becomes same to discrete-window based sum at the time that aligns to the time window. This means using the pseudo moving sum based nr_accesses makes no change to users who shows the value for every aggregation interval. Patches Sequence ---------------- The sequence of the patches is as follows. The first four patches are for preparation of the change. The first two (patches 1 and 2) implements a helper function for nr_accesses update and eliminate corner case that skips use of the function, respectively. Following two (patches 3 and 4) respectively implement the pseudo-moving sum function and its simple unit test case. Two patches for making DAMON to use the pseudo-moving sum follow. The fifthe one (patch 5) introduces a new field for representing the pseudo-moving sum-based access rate of each region, and the sixth one makes the new representation to actually updated with the pseudo-moving sum function. Last two patches (patches 7 and 8) makes followup fixes for skipping unnecessary updates and marking the moving sum function as static, respectively. This patch (of 8): Each DAMON operarions set is updating nr_accesses field of each damon_region for each of their access check results, from the check_accesses() callback. Directly accessing the field could make things complex to manage and change in future. Define and use a dedicated function for the purpose. Link: https://lkml.kernel.org/r/20230915025251.72816-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230915025251.72816-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- mm/damon/core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index c5b7296c69a0..10532159323a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1549,6 +1549,22 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, return damon_set_regions(t, &addr_range, 1); } +/** + * damon_update_region_access_rate() - Update the access rate of a region. + * @r: The DAMON region to update for its access check result. + * @accessed: Whether the region has accessed during last sampling interval. + * + * Update the access rate of a region with the region's last sampling interval + * access check result. + * + * Usually this will be called by &damon_operations->check_accesses callback. + */ +void damon_update_region_access_rate(struct damon_region *r, bool accessed) +{ + if (accessed) + r->nr_accesses++; +} + static int __init damon_init(void) { damon_region_cache = KMEM_CACHE(damon_region, 0); -- cgit v1.2.3 From d2c062ade07ffd206dd16bf085f02abc59651309 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:46 +0000 Subject: mm/damon/core: implement a pseudo-moving sum function For values that continuously change, moving average or sum are good ways to provide fast updates while handling temporal and errorneous variability of the value. For example, the access rate counter (nr_accesses) is calculated as a sum of the number of positive sampled access check results that collected during a discrete time window (aggregation interval), and hence it handles temporal and errorneous access check results, but provides the update only for every aggregation interval. Using a moving sum method for that could allow providing the value for every sampling interval. That could be useful for getting monitoring results snapshot or running DAMOS in fine-grained timing. However, supporting the moving sum for cases that number of samples in the time window is arbirary could impose high overhead, since the number of past values that it needs to keep could be too high. The nr_accesses would also be one of the cases. To mitigate the overhead, implement a pseudo-moving sum function that only provides an estimated pseudo-moving sum. It assumes there was no error in last discrete time window and subtract constant portion of last discrete time window sum. Note that the function is not strictly implementing the moving sum, but it keeps a property of moving sum, which makes the value same to the dsicrete-window based sum for each time window-aligned timing. Hence, people collecting the value in the old timings would show no difference. Link: https://lkml.kernel.org/r/20230915025251.72816-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- mm/damon/core.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 10532159323a..b005dc15009f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1549,6 +1549,46 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, return damon_set_regions(t, &addr_range, 1); } +/* + * damon_moving_sum() - Calculate an inferred moving sum value. + * @mvsum: Inferred sum of the last @len_window values. + * @nomvsum: Non-moving sum of the last discrete @len_window window values. + * @len_window: The number of last values to take care of. + * @new_value: New value that will be added to the pseudo moving sum. + * + * Moving sum (moving average * window size) is good for handling noise, but + * the cost of keeping past values can be high for arbitrary window size. This + * function implements a lightweight pseudo moving sum function that doesn't + * keep the past window values. + * + * It simply assumes there was no noise in the past, and get the no-noise + * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a + * non-moving sum of the last window. For example, if @len_window is 10 and we + * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 + * values. Hence, this function simply drops @nomvsum / @len_window from + * given @mvsum and add @new_value. + * + * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for + * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For + * calculating next moving sum with a new value, we should drop 0 from 50 and + * add the new value. However, this function assumes it got value 5 for each + * of the last ten times. Based on the assumption, when the next value is + * measured, it drops the assumed past value, 5 from the current sum, and add + * the new value to get the updated pseduo-moving average. + * + * This means the value could have errors, but the errors will be disappeared + * for every @len_window aligned calls. For example, if @len_window is 10, the + * pseudo moving sum with 11th value to 19th value would have an error. But + * the sum with 20th value will not have the error. + * + * Return: Pseudo-moving average after getting the @new_value. + */ +unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value) +{ + return mvsum - nomvsum / len_window + new_value; +} + /** * damon_update_region_access_rate() - Update the access rate of a region. * @r: The DAMON region to update for its access check result. -- cgit v1.2.3 From 80333828ea7728ebe85d079bb5c1467eb9fc6c8c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:48 +0000 Subject: mm/damon/core: introduce nr_accesses_bp Add yet another representation of the access rate of each region, namely nr_accesses_bp. It is just same to the nr_accesses but represents the value in basis point (1 in 10,000), and updated at once in every aggregation interval. That is, moving_accesses_bp is just nr_accesses * 10000. This may seems useless at the moment. However, it will be useful for representing less than one nr_accesses value that will be needed to make moving sum-based nr_accesses. Link: https://lkml.kernel.org/r/20230915025251.72816-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- mm/damon/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index b005dc15009f..ce85c00b0a4c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -128,6 +128,7 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->ar.start = start; region->ar.end = end; region->nr_accesses = 0; + region->nr_accesses_bp = 0; INIT_LIST_HEAD(®ion->list); region->age = 0; @@ -508,6 +509,7 @@ static void damon_update_monitoring_result(struct damon_region *r, { r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->nr_accesses * 10000; r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); } @@ -1115,6 +1117,7 @@ static void damon_merge_two_regions(struct damon_target *t, l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / (sz_l + sz_r); + l->nr_accesses_bp = l->nr_accesses * 10000; l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); l->ar.end = r->ar.end; damon_destroy_region(r, t); @@ -1138,6 +1141,8 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, else r->age++; + r->nr_accesses_bp = r->nr_accesses * 10000; + if (prev && prev->ar.end == r->ar.start && abs(prev->nr_accesses - r->nr_accesses) <= thres && damon_sz_region(prev) + damon_sz_region(r) <= sz_limit) @@ -1186,6 +1191,7 @@ static void damon_split_region_at(struct damon_target *t, new->age = r->age; new->last_nr_accesses = r->last_nr_accesses; + new->nr_accesses_bp = r->nr_accesses_bp; damon_insert_region(new, r, damon_next_region(r), t); } -- cgit v1.2.3 From ace30fb21af5f1be1605db72c16040b95b1557ef Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:49 +0000 Subject: mm/damon/core: use pseudo-moving sum for nr_accesses_bp Let nr_accesses_bp be calculated as a pseudo-moving sum that updated for every sampling interval, using damon_moving_sum(). This is assumed to be useful for cases that the aggregation interval is set quite huge, but the monivoting results need to be collected earlier than next aggregation interval is passed. Link: https://lkml.kernel.org/r/20230915025251.72816-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- mm/damon/core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index ce85c00b0a4c..29ee1fc18393 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1599,14 +1599,28 @@ unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, * damon_update_region_access_rate() - Update the access rate of a region. * @r: The DAMON region to update for its access check result. * @accessed: Whether the region has accessed during last sampling interval. + * @attrs: The damon_attrs of the DAMON context. * * Update the access rate of a region with the region's last sampling interval * access check result. * * Usually this will be called by &damon_operations->check_accesses callback. */ -void damon_update_region_access_rate(struct damon_region *r, bool accessed) +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs) { + unsigned int len_window = 1; + + /* + * sample_interval can be zero, but cannot be larger than + * aggr_interval, owing to validation of damon_set_attrs(). + */ + if (attrs->sample_interval) + len_window = attrs->aggr_interval / attrs->sample_interval; + r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, + r->last_nr_accesses * 10000, len_window, + accessed ? 10000 : 0); + if (accessed) r->nr_accesses++; } -- cgit v1.2.3 From 401807a316bb913f5eefcaf8343575ec9296d6b1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:50 +0000 Subject: mm/damon/core: skip updating nr_accesses_bp for each aggregation interval damon_merge_regions_of(), which is called for each aggregation interval, updates nr_accesses_bp to nr_accesses * 10000. However, nr_accesses_bp is updated for each sampling interval via damon_moving_sum() using the aggregation interval as the moving time window. And by the definition of the algorithm, the value becomes same to discrete-window based sum for each time window-aligned time. Hence, nr_accesses_bp will be same to nr_accesses * 10000 for each aggregation interval without explicit update. Remove the unnecessary update of nr_accesses_bp in damon_merge_regions_of(). Link: https://lkml.kernel.org/r/20230915025251.72816-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- mm/damon/core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 29ee1fc18393..45cc108c0fe1 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1141,8 +1141,6 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, else r->age++; - r->nr_accesses_bp = r->nr_accesses * 10000; - if (prev && prev->ar.end == r->ar.start && abs(prev->nr_accesses - r->nr_accesses) <= thres && damon_sz_region(prev) + damon_sz_region(r) <= sz_limit) -- cgit v1.2.3 From 863803a7948c8e33e6a7b002017747ca83ecfd63 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:51 +0000 Subject: mm/damon/core: mark damon_moving_sum() as a static function The function is used by only mm/damon/core.c. Mark it as a static function. Link: https://lkml.kernel.org/r/20230915025251.72816-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 45cc108c0fe1..b15cf47d2d29 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1587,7 +1587,7 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, * * Return: Pseudo-moving average after getting the @new_value. */ -unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, +static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, unsigned int len_window, unsigned int new_value) { return mvsum - nomvsum / len_window + new_value; -- cgit v1.2.3 From affa87c708185cab194099ee51b946ef0297f063 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Sep 2023 02:09:37 +0000 Subject: mm/damon/core: make DAMOS uses nr_accesses_bp instead of nr_accesses Patch series "mm/damon: implement DAMOS apply intervals". DAMON-based operation schemes are applied for every aggregation interval. That is mainly because schemes are using nr_accesses, which be complete to be used for every aggregation interval. This makes some DAMOS use cases be tricky. Quota setting under long aggregation interval is one such example. Suppose the aggregation interval is ten seconds, and there is a scheme having CPU quota 100ms per 1s. The scheme will actually uses 100ms per ten seconds, since it cannobe be applied before next aggregation interval. The feature is working as intended, but the results might not that intuitive for some users. This could be fixed by updating the quota to 1s per 10s. But, in the case, the CPU usage of DAMOS could look like spikes, and actually make a bad effect to other CPU-sensitive workloads. Also, with such huge aggregation interval, users may want schemes to be applied more frequently. DAMON provides nr_accesses_bp, which is updated for each sampling interval in a way that reasonable to be used. By using that instead of nr_accesses, DAMOS can have its own time interval and mitigate abovely mentioned issues. This patchset makes DAMOS schemes to use nr_accesses_bp instead of nr_accesses, and have their own timing intervals. Also update DAMOS tried regions sysfs files and DAMOS before_apply tracepoint to use the new data as their source. Note that the interval is zero by default, and it is interpreted to use the aggregation interval instead. This avoids making user-visible behavioral changes. Patches Seuqeunce ----------------- The first patch (patch 1/9) makes DAMOS uses nr_accesses_bp instead of nr_accesses, and following two patches (patches 2/9 and 3/9) updates DAMON sysfs interface for DAMOS tried regions and the DAMOS before_apply tracespoint to use nr_accesses_bp instead of nr_accesses, respectively. The following two patches (patches 4/9 and 5/9) implements the scheme-specific apply interval for DAMON kernel API users and update the design document for the new feature. Finally, the following four patches (patches 6/9, 7/9, 8/9 and 9/9) add support of the feature in DAMON sysfs interface, add a simple selftest test case, and document the new file on the usage and the ABI documents, repsectively. This patch (of 9): DAMON provides nr_accesses_bp, which becomes same to nr_accesses * 10000 for every aggregation interval, but updated every sampling interval with a reasonable accuracy. Since DAMON-based operation schemes are applied in every aggregation interval using nr_accesses, using nr_accesses_bp instead will make no difference to users. Meanwhile, it allows DAMOS to apply the schemes in a time interval that less than the aggregation interval. It could be useful and more flexible for some cases. Do it. Link: https://lkml.kernel.org/r/20230916020945.47296-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230916020945.47296-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/damon/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index b15cf47d2d29..79fef5145a4b 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -770,12 +770,13 @@ static void damon_split_region_at(struct damon_target *t, static bool __damos_valid_target(struct damon_region *r, struct damos *s) { unsigned long sz; + unsigned int nr_accesses = r->nr_accesses_bp / 10000; sz = damon_sz_region(r); return s->pattern.min_sz_region <= sz && sz <= s->pattern.max_sz_region && - s->pattern.min_nr_accesses <= r->nr_accesses && - r->nr_accesses <= s->pattern.max_nr_accesses && + s->pattern.min_nr_accesses <= nr_accesses && + nr_accesses <= s->pattern.max_nr_accesses && s->pattern.min_age_region <= r->age && r->age <= s->pattern.max_age_region; } -- cgit v1.2.3 From 42f994b71404b17abcd6b170de7a6aa95ffe5d4a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Sep 2023 02:09:40 +0000 Subject: mm/damon/core: implement scheme-specific apply interval DAMON-based operation schemes are applied for every aggregation interval. That was mainly because schemes were using nr_accesses, which be complete to be used for every aggregation interval. However, the schemes are now using nr_accesses_bp, which is updated for each sampling interval in a way that reasonable to be used. Therefore, there is no reason to apply schemes for each aggregation interval. The unnecessary alignment with aggregation interval was also making some use cases of DAMOS tricky. Quotas setting under long aggregation interval is one such example. Suppose the aggregation interval is ten seconds, and there is a scheme having CPU quota 100ms per 1s. The scheme will actually uses 100ms per ten seconds, since it cannobe be applied before next aggregation interval. The feature is working as intended, but the results might not that intuitive for some users. This could be fixed by updating the quota to 1s per 10s. But, in the case, the CPU usage of DAMOS could look like spikes, and would actually make a bad effect to other CPU-sensitive workloads. Implement a dedicated timing interval for each DAMON-based operation scheme, namely apply_interval. The interval will be sampling interval aligned, and each scheme will be applied for its apply_interval. The interval is set to 0 by default, and it means the scheme should use the aggregation interval instead. This avoids old users getting any behavioral difference. Link: https://lkml.kernel.org/r/20230916020945.47296-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/damon/core.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 7 deletions(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 79fef5145a4b..5eb649bd002f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -313,7 +313,9 @@ static struct damos_quota *damos_quota_init_priv(struct damos_quota *quota) } struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks) { struct damos *scheme; @@ -323,6 +325,13 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, return NULL; scheme->pattern = *pattern; scheme->action = action; + scheme->apply_interval_us = apply_interval_us; + /* + * next_apply_sis will be set when kdamond starts. While kdamond is + * running, it will also updated when it is added to the DAMON context, + * or damon_attrs are updated. + */ + scheme->next_apply_sis = 0; INIT_LIST_HEAD(&scheme->filters); scheme->stat = (struct damos_stat){}; INIT_LIST_HEAD(&scheme->list); @@ -335,9 +344,21 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, return scheme; } +static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + unsigned long apply_interval = s->apply_interval_us ? + s->apply_interval_us : ctx->attrs.aggr_interval; + + s->next_apply_sis = ctx->passed_sample_intervals + + apply_interval / sample_interval; +} + void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) { list_add_tail(&s->list, &ctx->schemes); + damos_set_next_apply_sis(s, ctx); } static void damon_del_scheme(struct damos *s) @@ -558,6 +579,7 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) { unsigned long sample_interval = attrs->sample_interval ? attrs->sample_interval : 1; + struct damos *s; if (attrs->min_nr_regions < 3) return -EINVAL; @@ -573,6 +595,10 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; + + damon_for_each_scheme(s, ctx) + damos_set_next_apply_sis(s, ctx); + return 0; } @@ -1094,14 +1120,29 @@ static void kdamond_apply_schemes(struct damon_ctx *c) struct damon_target *t; struct damon_region *r, *next_r; struct damos *s; + unsigned long sample_interval = c->attrs.sample_interval ? + c->attrs.sample_interval : 1; + bool has_schemes_to_apply = false; damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals != s->next_apply_sis) + continue; + + s->next_apply_sis += + (s->apply_interval_us ? s->apply_interval_us : + c->attrs.aggr_interval) / sample_interval; + if (!s->wmarks.activated) continue; + has_schemes_to_apply = true; + damos_adjust_quota(c, s); } + if (!has_schemes_to_apply) + return; + damon_for_each_target(t, c) { damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); @@ -1372,11 +1413,19 @@ static void kdamond_init_intervals_sis(struct damon_ctx *ctx) { unsigned long sample_interval = ctx->attrs.sample_interval ? ctx->attrs.sample_interval : 1; + unsigned long apply_interval; + struct damos *scheme; ctx->passed_sample_intervals = 0; ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / sample_interval; + + damon_for_each_scheme(scheme, ctx) { + apply_interval = scheme->apply_interval_us ? + scheme->apply_interval_us : ctx->attrs.aggr_interval; + scheme->next_apply_sis = apply_interval / sample_interval; + } } /* @@ -1428,19 +1477,28 @@ static int kdamond_fn(void *data) if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - sample_interval = ctx->attrs.sample_interval ? - ctx->attrs.sample_interval : 1; if (ctx->passed_sample_intervals == next_aggregation_sis) { - ctx->next_aggregation_sis = next_aggregation_sis + - ctx->attrs.aggr_interval / sample_interval; kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); if (ctx->callback.after_aggregation && ctx->callback.after_aggregation(ctx)) break; - if (!list_empty(&ctx->schemes)) - kdamond_apply_schemes(ctx); + } + + /* + * do kdamond_apply_schemes() after kdamond_merge_regions() if + * possible, to reduce overhead + */ + if (!list_empty(&ctx->schemes)) + kdamond_apply_schemes(ctx); + + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals == next_aggregation_sis) { + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; + kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); if (ctx->ops.reset_aggregated) -- cgit v1.2.3 From 987ffa5a3858bee448dc791cf6f596790aea52a8 Mon Sep 17 00:00:00 2001 From: Huan Yang Date: Wed, 20 Sep 2023 09:57:27 +0800 Subject: mm/damon/core: remove unnecessary si_meminfo invoke. si_meminfo() will read and assign more info not just free/ram pages. For just DAMOS_WMARK_FREE_MEM_RATE use, only get free and ram pages is ok to save cpu. Link: https://lkml.kernel.org/r/20230920015727.4482-1-link@vivo.com Signed-off-by: Huan Yang Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 5eb649bd002f..9f4f7c378cf3 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1326,12 +1326,10 @@ static bool kdamond_need_stop(struct damon_ctx *ctx) static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) { - struct sysinfo i; - switch (metric) { case DAMOS_WMARK_FREE_MEM_RATE: - si_meminfo(&i); - return i.freeram * 1000 / i.totalram; + return global_zone_page_state(NR_FREE_PAGES) * 1000 / + totalram_pages(); default: break; } -- cgit v1.2.3 From d35963bfb05877455228ecec6b194f624489f96a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Oct 2023 19:49:21 +0000 Subject: mm/damon/core: avoid divide-by-zero during monitoring results update When monitoring attributes are changed, DAMON updates access rate of the monitoring results accordingly. For that, it divides some values by the maximum nr_accesses. However, due to the type of the related variables, simple division-based calculation of the divisor can return zero. As a result, divide-by-zero is possible. Fix it by using damon_max_nr_accesses(), which handles the case. Link: https://lkml.kernel.org/r/20231019194924.100347-3-sj@kernel.org Fixes: 2f5bef5a590b ("mm/damon/core: update monitoring results for new monitoring attributes") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: [6.3+] Signed-off-by: Andrew Morton --- mm/damon/core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index 9f4f7c378cf3..e194c8075235 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -500,20 +500,14 @@ static unsigned int damon_age_for_new_attrs(unsigned int age, static unsigned int damon_accesses_bp_to_nr_accesses( unsigned int accesses_bp, struct damon_attrs *attrs) { - unsigned int max_nr_accesses = - attrs->aggr_interval / attrs->sample_interval; - - return accesses_bp * max_nr_accesses / 10000; + return accesses_bp * damon_max_nr_accesses(attrs) / 10000; } /* convert nr_accesses to access ratio in bp (per 10,000) */ static unsigned int damon_nr_accesses_to_accesses_bp( unsigned int nr_accesses, struct damon_attrs *attrs) { - unsigned int max_nr_accesses = - attrs->aggr_interval / attrs->sample_interval; - - return nr_accesses * 10000 / max_nr_accesses; + return nr_accesses * 10000 / damon_max_nr_accesses(attrs); } static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, -- cgit v1.2.3 From 62f76a7b53bfa2ecfe1570a5b1d0d574c576a56d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Oct 2023 19:49:24 +0000 Subject: mm/damon/core: avoid divide-by-zero from pseudo-moving window length calculation When calculating the pseudo-moving access rate, DAMON divides some values by the maximum nr_accesses. However, due to the type of the related variables, simple division-based calculation of the divisor can return zero. As a result, divide-by-zero is possible. Fix it by using damon_max_nr_accesses(), which handles the case. Note that this is a fix for a commit that not in the mainline but mm tree. Link: https://lkml.kernel.org/r/20231019194924.100347-6-sj@kernel.org Fixes: ace30fb21af5 ("mm/damon/core: use pseudo-moving sum for nr_accesses_bp") Reported-by: Jakub Acs Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/damon/core.c') diff --git a/mm/damon/core.c b/mm/damon/core.c index e194c8075235..aa2dc7087cd9 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1665,7 +1665,7 @@ void damon_update_region_access_rate(struct damon_region *r, bool accessed, * aggr_interval, owing to validation of damon_set_attrs(). */ if (attrs->sample_interval) - len_window = attrs->aggr_interval / attrs->sample_interval; + len_window = damon_max_nr_accesses(attrs); r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, r->last_nr_accesses * 10000, len_window, accessed ? 10000 : 0); -- cgit v1.2.3