Skip to content

Commit c2b403a

Browse files
committed
[GR-50022] Refactor Native Image benchmarking to better control individual stages
PullRequest: graal/16688
2 parents 8cf9604 + c159722 commit c2b403a

File tree

6 files changed

+832
-454
lines changed

6 files changed

+832
-454
lines changed

compiler/mx.compiler/mx_graal_benchmark.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@
2626
import re
2727
import os
2828
from tempfile import mkstemp
29+
from typing import List, Optional
2930

3031
import mx
3132
import mx_benchmark
3233
import mx_sdk_benchmark
3334
import mx_compiler
3435
from mx_java_benchmarks import DaCapoBenchmarkSuite, ScalaDaCapoBenchmarkSuite
36+
from mx_benchmark import DataPoints
37+
from mx_sdk_benchmark import SUCCESSFUL_STAGE_PATTERNS
3538

3639
_suite = mx.suite('compiler')
3740

@@ -406,7 +409,45 @@ def benchSuiteName(self, bmSuiteArgs=None):
406409
mx_benchmark.add_bm_suite(ScalaDaCapoTimingBenchmarkSuite())
407410

408411

409-
class JMHNativeImageBenchmarkMixin(mx_sdk_benchmark.NativeImageBenchmarkMixin):
412+
class JMHNativeImageBenchmarkMixin(mx_benchmark.JMHBenchmarkSuiteBase, mx_sdk_benchmark.NativeImageBenchmarkMixin):
413+
414+
def get_jmh_result_file(self, bm_suite_args: List[str]) -> Optional[str]:
415+
"""
416+
Only generate a JMH result file in the run stage. Otherwise the file-based rule (see
417+
:class:`mx_benchmark.JMHJsonRule`) will produce datapoints at every stage, based on results from a previous
418+
stage.
419+
"""
420+
if self.is_native_mode(bm_suite_args) and not self.stages_info.fallback_mode:
421+
# At this point, the StagesInfo class may not have all the information yet, in that case we rely on the
422+
# requested stage. But if this function is called later again when it is fully set up, we have to use the
423+
# effective stage instead.
424+
# This is important so that the JMH parsing rule is only enabled when the stage actually ran (if it is
425+
# skipped, it would otherwise pick up a previous result file)
426+
if self.stages_info.is_set_up:
427+
current_stage = self.stages_info.effective_stage
428+
else:
429+
current_stage = self.stages_info.requested_stage
430+
431+
if current_stage not in ["agent", "instrument-run", "run"]:
432+
return None
433+
434+
return super().get_jmh_result_file(bm_suite_args)
435+
436+
def fallback_mode_reason(self, bm_suite_args: List[str]) -> Optional[str]:
437+
"""
438+
JMH benchmarks need to use the fallback mode if --jmh-run-individually is used.
439+
The flag causes one native image to be built per JMH benchmark. This is fundamentally incompatible with the
440+
default benchmarking mode of running each stage on its own because a benchmark will overwrite the intermediate
441+
files of the previous benchmark if not all stages are run at once.
442+
443+
In the fallback mode, collection of performance data is limited. Only performance data of the ``run`` stage can
444+
reliably be collected. Other metrics, such as image build statistics or profiling performance cannot reliably be
445+
collected because they cannot be attributed so a specific individual JMH benchmark.
446+
"""
447+
if self.jmhArgs(bm_suite_args).jmh_run_individually:
448+
return "--jmh-run-individually is not compatible with selecting individual stages"
449+
else:
450+
return None
410451

411452
def extra_image_build_argument(self, benchmark, args):
412453
# JMH does HotSpot-specific field offset checks in class initializers
@@ -462,6 +503,9 @@ def group(self):
462503
def subgroup(self):
463504
return "graal-compiler"
464505

506+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
507+
return self.intercept_run(super(), benchmarks, bmSuiteArgs)
508+
465509

466510
mx_benchmark.add_bm_suite(JMHRunnerGraalCoreBenchmarkSuite())
467511

@@ -477,6 +521,9 @@ def group(self):
477521
def subgroup(self):
478522
return "graal-compiler"
479523

524+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
525+
return self.intercept_run(super(), benchmarks, bmSuiteArgs)
526+
480527

481528
mx_benchmark.add_bm_suite(JMHJarGraalCoreBenchmarkSuite())
482529

@@ -492,10 +539,16 @@ def group(self):
492539
def subgroup(self):
493540
return "graal-compiler"
494541

542+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
543+
return self.intercept_run(super(), benchmarks, bmSuiteArgs)
544+
495545
def filter_distribution(self, dist):
496546
return super(JMHDistGraalCoreBenchmarkSuite, self).filter_distribution(dist) and \
497547
not JMHDistWhiteboxBenchmarkSuite.is_whitebox_dependency(dist)
498548

549+
def successPatterns(self):
550+
return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
551+
499552

500553
mx_benchmark.add_bm_suite(JMHDistGraalCoreBenchmarkSuite())
501554

@@ -511,6 +564,9 @@ def group(self):
511564
def subgroup(self):
512565
return "graal-compiler"
513566

567+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
568+
return self.intercept_run(super(), benchmarks, bmSuiteArgs)
569+
514570
@staticmethod
515571
def is_whitebox_dependency(dist):
516572
return hasattr(dist, 'graalWhiteboxDistribution') and dist.graalWhiteboxDistribution
@@ -542,5 +598,8 @@ def getJMHEntry(self, bmSuiteArgs):
542598
assert self.dist
543599
return [mx.distribution(self.dist).mainClass]
544600

601+
def successPatterns(self):
602+
return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
603+
545604

546605
mx_benchmark.add_bm_suite(JMHDistWhiteboxBenchmarkSuite())

java-benchmarks/mx.java-benchmarks/mx_java_benchmarks.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
import mx
3737
import mx_benchmark
38-
from mx_benchmark import ParserEntry
38+
from mx_benchmark import ParserEntry, DataPoints
3939
import mx_sdk_benchmark
4040
from mx_sdk_benchmark import NativeImageBundleBasedBenchmarkMixin
4141
import mx_sdk_vm_impl
@@ -161,12 +161,8 @@ def skip_agent_assertions(self, benchmark, args):
161161
else:
162162
return []
163163

164-
def stages(self, args):
165-
# This method overrides NativeImageMixin.stages
166-
parsed_arg = mx_sdk_benchmark.parse_prefixed_arg('-Dnative-image.benchmark.stages=', args, 'Native Image benchmark stages should only be specified once.')
167-
return parsed_arg.split(',') if parsed_arg else self.default_stages()
168-
169164
def default_stages(self):
165+
# This method is used by NativeImageMixin.stages
170166
raise NotImplementedError()
171167

172168

@@ -186,7 +182,6 @@ def get_application_startup_units(self):
186182
return 's'
187183

188184
def default_stages(self):
189-
# This method overrides NativeImageMixin.stages
190185
return ['instrument-image', 'instrument-run', 'image', 'run']
191186

192187
def uses_bundles(self):
@@ -420,6 +415,9 @@ def benchmarkList(self, bmSuiteArgs):
420415
def default_stages(self):
421416
return ['image']
422417

418+
def run(self, benchmarks, bmSuiteArgs):
419+
self.intercept_run(super(), benchmarks, bmSuiteArgs)
420+
423421
def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
424422
if benchmarks is None:
425423
mx.abort("Suite can only run a single benchmark per VM instance.")
@@ -1952,7 +1950,7 @@ def rules(self, out, benchmarks, bmSuiteArgs):
19521950
)
19531951
]
19541952

1955-
def run(self, benchmarks, bmSuiteArgs):
1953+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
19561954
results = super(RenaissanceBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
19571955
self.addAverageAcrossLatestResults(results)
19581956
return results
@@ -2029,7 +2027,7 @@ def getExtraIterationCount(self, iterations):
20292027
# We average over the last 2 out of 3 total iterations done by this suite.
20302028
return 2
20312029

2032-
def run(self, benchmarks, bmSuiteArgs):
2030+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
20332031
runretval = self.runAndReturnStdOut(benchmarks, bmSuiteArgs)
20342032
retcode, out, dims = runretval
20352033
self.validateStdoutWithDimensions(
@@ -2164,7 +2162,7 @@ def rules(self, out, benchmarks, bmSuiteArgs):
21642162
)
21652163
]
21662164

2167-
def run(self, benchmarks, bmSuiteArgs):
2165+
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
21682166
results = super(AWFYBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
21692167
self.addAverageAcrossLatestResults(results)
21702168
return results

0 commit comments

Comments
 (0)