Fix benchmark comparison in bench_download.py tool (#12234)

`./bench_download.py` tool stopped working after #12226. Because the artifacts on benchmark workflow runs were renamed. This PR just renames the artifacts (suggested in #12201 (comment)) and also adds some unit tests. In many unit tests, I had to bump the date of the fetched data, because GH seems to delete workflow runs that are older than 2 years. Note that yesterday, [Benchmark Upload](https://github.com/enso-org/enso/actions/workflows/bench-upload.yml) workflow started printing a [warning that there is an unknown artifact name](https://github.com/enso-org/enso/actions/runs/13152367074/job/36701982751#step:6:1116)
enso-org · Feb 5, 2025 · 10fe252 · 10fe252
1 parent 2297dca
commit 10fe252
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 10 deletions.
diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md
@@ -35,6 +35,11 @@ Run local tests with:
 python -m unittest --verbose bench_tool/test*.py
 ```
 
+Run a single test with:
+```bash
+python -m unittest --verbose bench_tool/test*.py -k <test_name>
+```
+
 ## Relation to GH Actions
 
 The `bench_download.py` script is used in

diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -58,9 +58,9 @@ def workflow_ids(self) -> List[int]:
 
     def artifact_names(self) -> List[str]:
         if self == Source.ENGINE:
-            return ["Runtime Benchmark Report"]
+            return ["Runtime Benchmark Report", "benchmark-results.xml"]
         elif self == Source.STDLIB:
-            return ["Enso JMH Benchmark Report"]
+            return ["Enso JMH Benchmark Report", "benchmark-results.xml"]
         else:
             raise ValueError(f"Unknown source {self}")
 

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -46,8 +46,8 @@ async def test_get_bench_run(self):
         Bench run does not need remote cache - it fetches just some metadata about GH artifacts.
         :return:
         """
-        since = datetime.fromisoformat("2023-10-01")
-        until = datetime.fromisoformat("2023-10-05")
+        since = datetime.fromisoformat("2024-10-01")
+        until = datetime.fromisoformat("2024-10-05")
         bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
         self.assertGreater(len(bench_runs), 0)
         bench_run = bench_runs[0]
@@ -58,9 +58,9 @@ async def test_get_bench_run(self):
 
     async def test_get_bench_report(self):
         # We choose an old date on purpose, so that the remote cache must be used, and is thus
-        # transitively tested.
-        since = datetime.fromisoformat("2023-10-01")
-        until = datetime.fromisoformat("2023-10-05")
+        # transitively tested. Note that GH deletes workflow runs that are older than 2 years.
+        since = datetime.fromisoformat("2024-10-01")
+        until = datetime.fromisoformat("2024-10-05")
         bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
         self.assertGreater(len(bench_runs), 0)
         bench_run = bench_runs[0]
@@ -69,5 +69,21 @@ async def test_get_bench_report(self):
             bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
             self.assertIsNotNone(bench_report)
             self.assertEqual(bench_run, bench_report.bench_run)
-            self.assertEqual(64, len(bench_report.label_score_dict))
+            self.assertEqual(70, len(bench_report.label_score_dict))
+
+    async def test_get_new_bench_report(self):
+        # Artifact names changed on 2025-02-03 - in PR https://github.com/enso-org/enso/pull/12226
+        # This test ensures that the artifact names were correctly updated
+        since = datetime.fromisoformat("2025-02-03")
+        until = datetime.fromisoformat("2025-02-05")
+        bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
+        self.assertGreater(len(bench_runs), 0)
+        bench_run = bench_runs[0]
+        remote_cache = ReadonlyRemoteCache()
+        with WithTempDir("test_get_bench_report") as temp_dir:
+            bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
+            self.assertIsNotNone(bench_report)
+            self.assertEqual(bench_run, bench_report.bench_run)
+            self.assertEqual(80, len(bench_report.label_score_dict))
+
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
@@ -17,8 +17,8 @@ async def test_engine_website_regen(self):
         remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
         # Pull the repo if necessary
         await remote_cache.initialize()
-        since = datetime.fromisoformat("2023-10-01")
-        until = datetime.fromisoformat("2023-10-25")
+        since = datetime.fromisoformat("2024-10-01")
+        until = datetime.fromisoformat("2024-10-25")
         with WithTempDir("test_engine_website_regen") as temp_dir:
             temp_dir_path = Path(temp_dir)
             html_out = temp_dir_path.joinpath("engine-benchs.html")