Skip to content

Commit

Permalink
Fix benchmark comparison in bench_download.py tool (#12234)
Browse files Browse the repository at this point in the history
`./bench_download.py` tool stopped working after #12226. Because the artifacts on benchmark workflow runs were renamed. This PR just renames the artifacts (suggested in #12201 (comment)) and also adds some unit tests.

In many unit tests, I had to bump the date of the fetched data, because GH seems to delete workflow runs that are older than 2 years.

Note that yesterday, [Benchmark Upload](https://github.com/enso-org/enso/actions/workflows/bench-upload.yml) workflow started printing a [warning that there is an unknown artifact name](https://github.com/enso-org/enso/actions/runs/13152367074/job/36701982751#step:6:1116)
  • Loading branch information
Akirathan authored Feb 5, 2025
1 parent 2297dca commit 10fe252
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 10 deletions.
5 changes: 5 additions & 0 deletions tools/performance/engine-benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ Run local tests with:
python -m unittest --verbose bench_tool/test*.py
```

Run a single test with:
```bash
python -m unittest --verbose bench_tool/test*.py -k <test_name>
```

## Relation to GH Actions

The `bench_download.py` script is used in
Expand Down
4 changes: 2 additions & 2 deletions tools/performance/engine-benchmarks/bench_tool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def workflow_ids(self) -> List[int]:

def artifact_names(self) -> List[str]:
if self == Source.ENGINE:
return ["Runtime Benchmark Report"]
return ["Runtime Benchmark Report", "benchmark-results.xml"]
elif self == Source.STDLIB:
return ["Enso JMH Benchmark Report"]
return ["Enso JMH Benchmark Report", "benchmark-results.xml"]
else:
raise ValueError(f"Unknown source {self}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ async def test_get_bench_run(self):
Bench run does not need remote cache - it fetches just some metadata about GH artifacts.
:return:
"""
since = datetime.fromisoformat("2023-10-01")
until = datetime.fromisoformat("2023-10-05")
since = datetime.fromisoformat("2024-10-01")
until = datetime.fromisoformat("2024-10-05")
bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
self.assertGreater(len(bench_runs), 0)
bench_run = bench_runs[0]
Expand All @@ -58,9 +58,9 @@ async def test_get_bench_run(self):

async def test_get_bench_report(self):
# We choose an old date on purpose, so that the remote cache must be used, and is thus
# transitively tested.
since = datetime.fromisoformat("2023-10-01")
until = datetime.fromisoformat("2023-10-05")
# transitively tested. Note that GH deletes workflow runs that are older than 2 years.
since = datetime.fromisoformat("2024-10-01")
until = datetime.fromisoformat("2024-10-05")
bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
self.assertGreater(len(bench_runs), 0)
bench_run = bench_runs[0]
Expand All @@ -69,5 +69,21 @@ async def test_get_bench_report(self):
bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
self.assertIsNotNone(bench_report)
self.assertEqual(bench_run, bench_report.bench_run)
self.assertEqual(64, len(bench_report.label_score_dict))
self.assertEqual(70, len(bench_report.label_score_dict))

async def test_get_new_bench_report(self):
# Artifact names changed on 2025-02-03 - in PR https://github.com/enso-org/enso/pull/12226
# This test ensures that the artifact names were correctly updated
since = datetime.fromisoformat("2025-02-03")
until = datetime.fromisoformat("2025-02-05")
bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
self.assertGreater(len(bench_runs), 0)
bench_run = bench_runs[0]
remote_cache = ReadonlyRemoteCache()
with WithTempDir("test_get_bench_report") as temp_dir:
bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
self.assertIsNotNone(bench_report)
self.assertEqual(bench_run, bench_report.bench_run)
self.assertEqual(80, len(bench_report.label_score_dict))


Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ async def test_engine_website_regen(self):
remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
# Pull the repo if necessary
await remote_cache.initialize()
since = datetime.fromisoformat("2023-10-01")
until = datetime.fromisoformat("2023-10-25")
since = datetime.fromisoformat("2024-10-01")
until = datetime.fromisoformat("2024-10-25")
with WithTempDir("test_engine_website_regen") as temp_dir:
temp_dir_path = Path(temp_dir)
html_out = temp_dir_path.joinpath("engine-benchs.html")
Expand Down

0 comments on commit 10fe252

Please sign in to comment.