From 10fe25295a89b16f4d91c2640bc3b9be3bd6b64e Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 5 Feb 2025 12:53:22 +0100
Subject: [PATCH] Fix benchmark comparison in `bench_download.py` tool (#12234)

`./bench_download.py` tool stopped working after https://github.com/enso-org/enso/pull/12226. Because the artifacts on benchmark workflow runs were renamed. This PR just renames the artifacts (suggested in https://github.com/enso-org/enso/pull/12201#issuecomment-2635659249) and also adds some unit tests.

In many unit tests, I had to bump the date of the fetched data, because GH seems to delete workflow runs that are older than 2 years.

Note that yesterday, [Benchmark Upload](https://github.com/enso-org/enso/actions/workflows/bench-upload.yml) workflow started printing a [warning that there is an unknown artifact name](https://github.com/enso-org/enso/actions/runs/13152367074/job/36701982751#step:6:1116)
---
 tools/performance/engine-benchmarks/README.md |  5 ++++
 .../engine-benchmarks/bench_tool/__init__.py  |  4 +--
 .../bench_tool/test_bench_results.py          | 28 +++++++++++++++----
 .../bench_tool/test_website_regen.py          |  4 +--
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md
index 0363635f7da8..d45193c5342d 100644
--- a/tools/performance/engine-benchmarks/README.md
+++ b/tools/performance/engine-benchmarks/README.md
@@ -35,6 +35,11 @@ Run local tests with:
 python -m unittest --verbose bench_tool/test*.py
 ```
 
+Run a single test with:
+```bash
+python -m unittest --verbose bench_tool/test*.py -k <test_name>
+```
+
 ## Relation to GH Actions
 
 The `bench_download.py` script is used in
diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index 918676fd6283..11b88f08945b 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -58,9 +58,9 @@ def workflow_ids(self) -> List[int]:
 
     def artifact_names(self) -> List[str]:
         if self == Source.ENGINE:
-            return ["Runtime Benchmark Report"]
+            return ["Runtime Benchmark Report", "benchmark-results.xml"]
         elif self == Source.STDLIB:
-            return ["Enso JMH Benchmark Report"]
+            return ["Enso JMH Benchmark Report", "benchmark-results.xml"]
         else:
             raise ValueError(f"Unknown source {self}")
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
index d15e9c83f7d8..17aff6e163d5 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -46,8 +46,8 @@ async def test_get_bench_run(self):
         Bench run does not need remote cache - it fetches just some metadata about GH artifacts.
         :return:
         """
-        since = datetime.fromisoformat("2023-10-01")
-        until = datetime.fromisoformat("2023-10-05")
+        since = datetime.fromisoformat("2024-10-01")
+        until = datetime.fromisoformat("2024-10-05")
         bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
         self.assertGreater(len(bench_runs), 0)
         bench_run = bench_runs[0]
@@ -58,9 +58,9 @@ async def test_get_bench_run(self):
 
     async def test_get_bench_report(self):
         # We choose an old date on purpose, so that the remote cache must be used, and is thus
-        # transitively tested.
-        since = datetime.fromisoformat("2023-10-01")
-        until = datetime.fromisoformat("2023-10-05")
+        # transitively tested. Note that GH deletes workflow runs that are older than 2 years.
+        since = datetime.fromisoformat("2024-10-01")
+        until = datetime.fromisoformat("2024-10-05")
         bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
         self.assertGreater(len(bench_runs), 0)
         bench_run = bench_runs[0]
@@ -69,5 +69,21 @@ async def test_get_bench_report(self):
             bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
             self.assertIsNotNone(bench_report)
             self.assertEqual(bench_run, bench_report.bench_run)
-            self.assertEqual(64, len(bench_report.label_score_dict))
+            self.assertEqual(70, len(bench_report.label_score_dict))
+    
+    async def test_get_new_bench_report(self):
+        # Artifact names changed on 2025-02-03 - in PR https://github.com/enso-org/enso/pull/12226
+        # This test ensures that the artifact names were correctly updated
+        since = datetime.fromisoformat("2025-02-03")
+        until = datetime.fromisoformat("2025-02-05")
+        bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
+        self.assertGreater(len(bench_runs), 0)
+        bench_run = bench_runs[0]
+        remote_cache = ReadonlyRemoteCache()
+        with WithTempDir("test_get_bench_report") as temp_dir:
+            bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
+            self.assertIsNotNone(bench_report)
+            self.assertEqual(bench_run, bench_report.bench_run)
+            self.assertEqual(80, len(bench_report.label_score_dict))
+
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
index 58cef455905a..8c3bc5152ae2 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
@@ -17,8 +17,8 @@ async def test_engine_website_regen(self):
         remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
         # Pull the repo if necessary
         await remote_cache.initialize()
-        since = datetime.fromisoformat("2023-10-01")
-        until = datetime.fromisoformat("2023-10-25")
+        since = datetime.fromisoformat("2024-10-01")
+        until = datetime.fromisoformat("2024-10-25")
         with WithTempDir("test_engine_website_regen") as temp_dir:
             temp_dir_path = Path(temp_dir)
             html_out = temp_dir_path.joinpath("engine-benchs.html")