From 10fe25295a89b16f4d91c2640bc3b9be3bd6b64e Mon Sep 17 00:00:00 2001 From: Pavel Marek Date: Wed, 5 Feb 2025 12:53:22 +0100 Subject: [PATCH] Fix benchmark comparison in `bench_download.py` tool (#12234) `./bench_download.py` tool stopped working after https://github.com/enso-org/enso/pull/12226. Because the artifacts on benchmark workflow runs were renamed. This PR just renames the artifacts (suggested in https://github.com/enso-org/enso/pull/12201#issuecomment-2635659249) and also adds some unit tests. In many unit tests, I had to bump the date of the fetched data, because GH seems to delete workflow runs that are older than 2 years. Note that yesterday, [Benchmark Upload](https://github.com/enso-org/enso/actions/workflows/bench-upload.yml) workflow started printing a [warning that there is an unknown artifact name](https://github.com/enso-org/enso/actions/runs/13152367074/job/36701982751#step:6:1116) --- tools/performance/engine-benchmarks/README.md | 5 ++++ .../engine-benchmarks/bench_tool/__init__.py | 4 +-- .../bench_tool/test_bench_results.py | 28 +++++++++++++++---- .../bench_tool/test_website_regen.py | 4 +-- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md index 0363635f7da8..d45193c5342d 100644 --- a/tools/performance/engine-benchmarks/README.md +++ b/tools/performance/engine-benchmarks/README.md @@ -35,6 +35,11 @@ Run local tests with: python -m unittest --verbose bench_tool/test*.py ``` +Run a single test with: +```bash +python -m unittest --verbose bench_tool/test*.py -k +``` + ## Relation to GH Actions The `bench_download.py` script is used in diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py index 918676fd6283..11b88f08945b 100644 --- a/tools/performance/engine-benchmarks/bench_tool/__init__.py +++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py @@ -58,9 +58,9 @@ def workflow_ids(self) -> List[int]: def artifact_names(self) -> List[str]: if self == Source.ENGINE: - return ["Runtime Benchmark Report"] + return ["Runtime Benchmark Report", "benchmark-results.xml"] elif self == Source.STDLIB: - return ["Enso JMH Benchmark Report"] + return ["Enso JMH Benchmark Report", "benchmark-results.xml"] else: raise ValueError(f"Unknown source {self}") diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py index d15e9c83f7d8..17aff6e163d5 100644 --- a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py +++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py @@ -46,8 +46,8 @@ async def test_get_bench_run(self): Bench run does not need remote cache - it fetches just some metadata about GH artifacts. :return: """ - since = datetime.fromisoformat("2023-10-01") - until = datetime.fromisoformat("2023-10-05") + since = datetime.fromisoformat("2024-10-01") + until = datetime.fromisoformat("2024-10-05") bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID) self.assertGreater(len(bench_runs), 0) bench_run = bench_runs[0] @@ -58,9 +58,9 @@ async def test_get_bench_run(self): async def test_get_bench_report(self): # We choose an old date on purpose, so that the remote cache must be used, and is thus - # transitively tested. - since = datetime.fromisoformat("2023-10-01") - until = datetime.fromisoformat("2023-10-05") + # transitively tested. Note that GH deletes workflow runs that are older than 2 years. + since = datetime.fromisoformat("2024-10-01") + until = datetime.fromisoformat("2024-10-05") bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID) self.assertGreater(len(bench_runs), 0) bench_run = bench_runs[0] @@ -69,5 +69,21 @@ async def test_get_bench_report(self): bench_report = await get_bench_report(bench_run, temp_dir, remote_cache) self.assertIsNotNone(bench_report) self.assertEqual(bench_run, bench_report.bench_run) - self.assertEqual(64, len(bench_report.label_score_dict)) + self.assertEqual(70, len(bench_report.label_score_dict)) + + async def test_get_new_bench_report(self): + # Artifact names changed on 2025-02-03 - in PR https://github.com/enso-org/enso/pull/12226 + # This test ensures that the artifact names were correctly updated + since = datetime.fromisoformat("2025-02-03") + until = datetime.fromisoformat("2025-02-05") + bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID) + self.assertGreater(len(bench_runs), 0) + bench_run = bench_runs[0] + remote_cache = ReadonlyRemoteCache() + with WithTempDir("test_get_bench_report") as temp_dir: + bench_report = await get_bench_report(bench_run, temp_dir, remote_cache) + self.assertIsNotNone(bench_report) + self.assertEqual(bench_run, bench_report.bench_run) + self.assertEqual(80, len(bench_report.label_score_dict)) + diff --git a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py index 58cef455905a..8c3bc5152ae2 100644 --- a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py +++ b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py @@ -17,8 +17,8 @@ async def test_engine_website_regen(self): remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT) # Pull the repo if necessary await remote_cache.initialize() - since = datetime.fromisoformat("2023-10-01") - until = datetime.fromisoformat("2023-10-25") + since = datetime.fromisoformat("2024-10-01") + until = datetime.fromisoformat("2024-10-25") with WithTempDir("test_engine_website_regen") as temp_dir: temp_dir_path = Path(temp_dir) html_out = temp_dir_path.joinpath("engine-benchs.html")