Skip to content

Commit 4472edf

Browse files
sjp38akpm00
authored andcommitted
mm/damon/core: use number of passed access sampling as a timer
DAMON sleeps for sampling interval after each sampling, and check if the aggregation interval and the ops update interval have passed using ktime_get_coarse_ts64() and baseline timestamps for the intervals. That design is for making the operations occur at deterministic timing regardless of the time that spend for each work. However, it turned out it is not that useful, and incur not-that-intuitive results. After all, timer functions, and especially sleep functions that DAMON uses to wait for specific timing, are not necessarily strictly accurate. It is legal design, so no problem. However, depending on such inaccuracies, the nr_accesses can be larger than aggregation interval divided by sampling interval. For example, with the default setting (5 ms sampling interval and 100 ms aggregation interval) we frequently show regions having nr_accesses larger than 20. Also, if the execution of a DAMOS scheme takes a long time, next aggregation could happen before enough number of samples are collected. This is not what usual users would intuitively expect. Since access check sampling is the smallest unit work of DAMON, using the number of passed sampling intervals as the DAMON-internal timer can easily avoid these problems. That is, convert aggregation and ops update intervals to numbers of sampling intervals that need to be passed before those operations be executed, count the number of passed sampling intervals, and invoke the operations as soon as the specific amount of sampling intervals passed. Make the change. Note that this could make a behavioral change to settings that using intervals that not aligned by the sampling interval. For example, if the sampling interval is 5 ms and the aggregation interval is 12 ms, DAMON effectively uses 15 ms as its aggregation interval, because it checks whether the aggregation interval after sleeping the sampling interval. This change will make DAMON to effectively use 10 ms as aggregation interval, since it uses 'aggregation interval / sampling interval * sampling interval' as the effective aggregation interval, and we don't use floating point types. Usual users would have used aligned intervals, so this behavioral change is not expected to make any meaningful impact, so just make this change. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: SeongJae Park <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent aa5fe31 commit 4472edf

File tree

2 files changed

+59
-51
lines changed

2 files changed

+59
-51
lines changed

include/linux/damon.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,18 @@ struct damon_ctx {
524524
struct damon_attrs attrs;
525525

526526
/* private: internal use only */
527-
struct timespec64 last_aggregation;
528-
struct timespec64 last_ops_update;
527+
/* number of sample intervals that passed since this context started */
528+
unsigned long passed_sample_intervals;
529+
/*
530+
* number of sample intervals that should be passed before next
531+
* aggregation
532+
*/
533+
unsigned long next_aggregation_sis;
534+
/*
535+
* number of sample intervals that should be passed before next ops
536+
* update
537+
*/
538+
unsigned long next_ops_update_sis;
529539

530540
/* public: */
531541
struct task_struct *kdamond;

mm/damon/core.c

Lines changed: 47 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,10 @@ struct damon_ctx *damon_new_ctx(void)
427427
ctx->attrs.aggr_interval = 100 * 1000;
428428
ctx->attrs.ops_update_interval = 60 * 1000 * 1000;
429429

430-
ktime_get_coarse_ts64(&ctx->last_aggregation);
431-
ctx->last_ops_update = ctx->last_aggregation;
430+
ctx->passed_sample_intervals = 0;
431+
/* These will be set from kdamond_init_intervals_sis() */
432+
ctx->next_aggregation_sis = 0;
433+
ctx->next_ops_update_sis = 0;
432434

433435
mutex_init(&ctx->kdamond_lock);
434436

@@ -552,13 +554,21 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx,
552554
*/
553555
int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
554556
{
557+
unsigned long sample_interval = attrs->sample_interval ?
558+
attrs->sample_interval : 1;
559+
555560
if (attrs->min_nr_regions < 3)
556561
return -EINVAL;
557562
if (attrs->min_nr_regions > attrs->max_nr_regions)
558563
return -EINVAL;
559564
if (attrs->sample_interval > attrs->aggr_interval)
560565
return -EINVAL;
561566

567+
ctx->next_aggregation_sis = ctx->passed_sample_intervals +
568+
attrs->aggr_interval / sample_interval;
569+
ctx->next_ops_update_sis = ctx->passed_sample_intervals +
570+
attrs->ops_update_interval / sample_interval;
571+
562572
damon_update_monitoring_results(ctx, attrs);
563573
ctx->attrs = *attrs;
564574
return 0;
@@ -732,38 +742,6 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
732742
return err;
733743
}
734744

735-
/*
736-
* damon_check_reset_time_interval() - Check if a time interval is elapsed.
737-
* @baseline: the time to check whether the interval has elapsed since
738-
* @interval: the time interval (microseconds)
739-
*
740-
* See whether the given time interval has passed since the given baseline
741-
* time. If so, it also updates the baseline to current time for next check.
742-
*
743-
* Return: true if the time interval has passed, or false otherwise.
744-
*/
745-
static bool damon_check_reset_time_interval(struct timespec64 *baseline,
746-
unsigned long interval)
747-
{
748-
struct timespec64 now;
749-
750-
ktime_get_coarse_ts64(&now);
751-
if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
752-
interval * 1000)
753-
return false;
754-
*baseline = now;
755-
return true;
756-
}
757-
758-
/*
759-
* Check whether it is time to flush the aggregated information
760-
*/
761-
static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
762-
{
763-
return damon_check_reset_time_interval(&ctx->last_aggregation,
764-
ctx->attrs.aggr_interval);
765-
}
766-
767745
/*
768746
* Reset the aggregated monitoring results ('nr_accesses' of each region).
769747
*/
@@ -1274,18 +1252,6 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
12741252
last_nr_regions = nr_regions;
12751253
}
12761254

1277-
/*
1278-
* Check whether it is time to check and apply the operations-related data
1279-
* structures.
1280-
*
1281-
* Returns true if it is.
1282-
*/
1283-
static bool kdamond_need_update_operations(struct damon_ctx *ctx)
1284-
{
1285-
return damon_check_reset_time_interval(&ctx->last_ops_update,
1286-
ctx->attrs.ops_update_interval);
1287-
}
1288-
12891255
/*
12901256
* Check whether current monitoring should be stopped
12911257
*
@@ -1397,6 +1363,17 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
13971363
return -EBUSY;
13981364
}
13991365

1366+
static void kdamond_init_intervals_sis(struct damon_ctx *ctx)
1367+
{
1368+
unsigned long sample_interval = ctx->attrs.sample_interval ?
1369+
ctx->attrs.sample_interval : 1;
1370+
1371+
ctx->passed_sample_intervals = 0;
1372+
ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval;
1373+
ctx->next_ops_update_sis = ctx->attrs.ops_update_interval /
1374+
sample_interval;
1375+
}
1376+
14001377
/*
14011378
* The monitoring daemon that runs as a kernel thread
14021379
*/
@@ -1410,6 +1387,8 @@ static int kdamond_fn(void *data)
14101387

14111388
pr_debug("kdamond (%d) starts\n", current->pid);
14121389

1390+
kdamond_init_intervals_sis(ctx);
1391+
14131392
if (ctx->ops.init)
14141393
ctx->ops.init(ctx);
14151394
if (ctx->callback.before_start && ctx->callback.before_start(ctx))
@@ -1418,6 +1397,17 @@ static int kdamond_fn(void *data)
14181397
sz_limit = damon_region_sz_limit(ctx);
14191398

14201399
while (!kdamond_need_stop(ctx)) {
1400+
/*
1401+
* ctx->attrs and ctx->next_{aggregation,ops_update}_sis could
1402+
* be changed from after_wmarks_check() or after_aggregation()
1403+
* callbacks. Read the values here, and use those for this
1404+
* iteration. That is, damon_set_attrs() updated new values
1405+
* are respected from next iteration.
1406+
*/
1407+
unsigned long next_aggregation_sis = ctx->next_aggregation_sis;
1408+
unsigned long next_ops_update_sis = ctx->next_ops_update_sis;
1409+
unsigned long sample_interval = ctx->attrs.sample_interval;
1410+
14211411
if (kdamond_wait_activation(ctx))
14221412
break;
14231413

@@ -1427,12 +1417,17 @@ static int kdamond_fn(void *data)
14271417
ctx->callback.after_sampling(ctx))
14281418
break;
14291419

1430-
kdamond_usleep(ctx->attrs.sample_interval);
1420+
kdamond_usleep(sample_interval);
1421+
ctx->passed_sample_intervals++;
14311422

14321423
if (ctx->ops.check_accesses)
14331424
max_nr_accesses = ctx->ops.check_accesses(ctx);
14341425

1435-
if (kdamond_aggregate_interval_passed(ctx)) {
1426+
sample_interval = ctx->attrs.sample_interval ?
1427+
ctx->attrs.sample_interval : 1;
1428+
if (ctx->passed_sample_intervals == next_aggregation_sis) {
1429+
ctx->next_aggregation_sis = next_aggregation_sis +
1430+
ctx->attrs.aggr_interval / sample_interval;
14361431
kdamond_merge_regions(ctx,
14371432
max_nr_accesses / 10,
14381433
sz_limit);
@@ -1447,7 +1442,10 @@ static int kdamond_fn(void *data)
14471442
ctx->ops.reset_aggregated(ctx);
14481443
}
14491444

1450-
if (kdamond_need_update_operations(ctx)) {
1445+
if (ctx->passed_sample_intervals == next_ops_update_sis) {
1446+
ctx->next_ops_update_sis = next_ops_update_sis +
1447+
ctx->attrs.ops_update_interval /
1448+
sample_interval;
14511449
if (ctx->ops.update)
14521450
ctx->ops.update(ctx);
14531451
sz_limit = damon_region_sz_limit(ctx);

0 commit comments

Comments
 (0)