diff --git a/Taskfile.yml b/Taskfile.yml
index d5a9d6584..18ea676e5 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -49,6 +49,16 @@ tasks:
           poetry run python -W ignore utils/taskcluster_downloader.py
           --mode=model {{.CLI_ARGS}}
 
+  config-generator:
+    desc: Create a training config for a language pair
+    summary: |
+      The models will be saved to: ./data/taskcluster-model
+      Example: `task config-generator -- en fi`
+    deps: [poetry-install-utils]
+    cmds:
+      - >-
+          PYTHONPATH=$(pwd) poetry run python -W ignore utils/config_generator.py {{.CLI_ARGS}}
+
   opuscleaner:
     desc: Run the opuscleaner tool.
     deps: [poetry-install-opuscleaner]
diff --git a/configs/bs-en-spring-2024.yml b/configs/bs-en-spring-2024.yml
new file mode 100644
index 000000000..b3b501500
--- /dev/null
+++ b/configs/bs-en-spring-2024.yml
@@ -0,0 +1,118 @@
+# The initial configuration was generated using:
+# task config-generator -- bs en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: bs
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-bos
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   94,895,603 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (240,013 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-bos-eng - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-bos-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-bos - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-bos_BA-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       79,334,034 sentences
+  - opus_OpenSubtitles/v2018 #                           14,041,160 sentences
+  - opus_XLEnt/v1.2 #                                       266,696 sentences
+  - opus_Tanzil/v1 #                                        246,913 sentences
+  - opus_HPLT/v1.1 #                                        240,015 sentences
+  - opus_WikiMatrix/v1 #                                    210,691 sentences
+  - opus_CCAligned/v1 #                                     192,099 sentences
+  - opus_GNOME/v1 #                                         164,960 sentences
+  - opus_SETIMES/v2 #                                       138,387 sentences
+  - opus_wikimedia/v20230407 #                               28,167 sentences
+  - opus_QED/v2.0a #                                         12,541 sentences
+  - opus_TED2020/v1 #                                        11,638 sentences
+  - opus_NeuLab-TedTalks/v1 #                                 6,136 sentences
+  - opus_EUbookshop/v2 #                                        558 sentences
+  - opus_Tatoeba/v2023-04-12 #                                  515 sentences
+  - opus_tldr-pages/v2023-08-29 #                               479 sentences
+  - opus_ELRC-3047-wikipedia_health/v1 #                        205 sentences
+  - opus_ELRC-wikipedia_health/v1 #                             205 sentences
+  - opus_ELRC_2922/v1 #                                         204 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-bos #             ~3,117,009 sentences (352.2 MB)
+
+  # The monolingual data contains:
+  #   ~8,982,298 sentences
+  mono-src:
+  - news-crawl_news.2018  #              ~8,849 sentences (1.0M)
+  - news-crawl_news.2019 #            ~920,353 sentences (104M)
+  - news-crawl_news.2020 #          ~1,734,513 sentences (196M)
+  - news-crawl_news.2021 #          ~2,079,646 sentences (235M)
+  - news-crawl_news.2022 #          ~2,132,743 sentences (241M)
+  - news-crawl_news.2023 #          ~2,106,194 sentences (238M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/cs-en-spring-2024.yml b/configs/cs-en-spring-2024.yml
new file mode 100644
index 000000000..960dfe671
--- /dev/null
+++ b/configs/cs-en-spring-2024.yml
@@ -0,0 +1,234 @@
+# The initial configuration was generated using:
+# task config-generator -- cs en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: cs
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Lindat-khresmoi_summary_dev-2-ces-eng
+  - mtdata_Neulab-tedtalks_dev-1-eng-ces
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt19
+  - sacrebleu_aug-mix_wmt18/test-ts
+  - sacrebleu_aug-mix_wmt16
+  - sacrebleu_aug-mix_wmt14
+  - sacrebleu_aug-mix_wmt13
+  - sacrebleu_aug-mix_wmt11
+  - sacrebleu_aug-mix_wmt09
+  - sacrebleu_aug-mix_wmt08/nc
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt20
+  - sacrebleu_wmt18
+  - sacrebleu_wmt17
+  - sacrebleu_wmt15
+  - sacrebleu_wmt14/full
+  - sacrebleu_wmt12
+  - sacrebleu_wmt10
+  - sacrebleu_wmt08
+  - sacrebleu_multi30k/2016
+
+  # The training data contains:
+  #   213,550,488 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (56,307,029 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - opus_WikiTitles/v3 - ignored datasets (0 sentences)
+  #  - mtdata_ELRC-euipo_2017-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-emea-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-ces - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-ces-eng - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-ces-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-3-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-ces - duplicate with opus
+  #  - mtdata_Statmt-europarl-9-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-14-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-15-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-16-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-10-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-ces_CZ-eng - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-ces-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       56,307,029 sentences
+  - opus_ParaCrawl/v9 #                                  50,633,505 sentences
+  - opus_OpenSubtitles/v2018 #                           42,346,436 sentences
+  - opus_StanfordNLP-NMT/v1.0 #                          15,793,121 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,891,707 sentences
+  - opus_CCAligned/v1 #                                  12,730,121 sentences
+  - opus_DGT/v2019 #                                      5,207,753 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   4,813,030 sentences
+  - opus_XLEnt/v1.2 #                                     3,894,132 sentences
+  - opus_JRC-Acquis/v3.0 #                                1,273,411 sentences
+  - opus_ELRC-5067-SciPar/v1 #                            1,064,385 sentences
+  - opus_EMEA/v3 #                                        1,053,385 sentences
+  - opus_ELRC-2713-EMEA/v1 #                                779,083 sentences
+  - opus_ELRC_2682/v1 #                                     779,082 sentences
+  - opus_Europarl/v8 #                                      647,095 sentences
+  - opus_WikiMatrix/v1 #                                    519,195 sentences
+  - opus_EUbookshop/v2 #                                    455,472 sentences
+  - opus_QED/v2.0a #                                        441,508 sentences
+  - opus_ELITR-ECA/v1 #                                     295,788 sentences
+  - opus_Tanzil/v1 #                                        233,399 sentences
+  - opus_News-Commentary/v16 #                              218,509 sentences
+  - opus_TED2020/v1 #                                       170,611 sentences
+  - opus_wikimedia/v20230407 #                              146,717 sentences
+  - opus_KDE4/v2 #                                          134,071 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        129,652 sentences
+  - opus_NeuLab-TedTalks/v1 #                               111,107 sentences
+  - opus_ECB/v1 #                                            63,716 sentences
+  - opus_bible-uedin/v1 #                                    62,151 sentences
+  - opus_WMT-News/v2019 #                                    44,859 sentences
+  - opus_Tatoeba/v2023-04-12 #                               34,628 sentences
+  - opus_PHP/v1 #                                            32,983 sentences
+  - opus_Wikipedia/v1.0 #                                    27,723 sentences
+  - opus_ELRC-3564-EUR_LEX_covid/v1 #                        22,637 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,637 sentences
+  - opus_GlobalVoices/v2018q4 #                              18,876 sentences
+  - opus_ELRC-427-Electronic_Exchange_/v1 #                  17,357 sentences
+  - opus_ELRC-2012-EUIPO_2017/v1 #                           15,945 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                15,945 sentences
+  - opus_ELRC-antibiotic/v1 #                                15,678 sentences
+  - opus_ELRC-2874-EU_publications_medi/v1 #                 13,161 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,161 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            11,142 sentences
+  - opus_EUconst/v1 #                                         9,953 sentences
+  - opus_ELRC-3605-presscorner_covid/v1 #                     6,229 sentences
+  - opus_ELRC-2406-Czech_Supreme_Audit/v1 #                   4,771 sentences
+  - opus_ELRC_3382/v1 #                                       3,722 sentences
+  - opus_TildeMODEL/v2018 #                                   3,100 sentences
+  - opus_ELRC-2405-Czech_Supreme_Audit/v1 #                   2,868 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,559 sentences
+  - opus_ELRC-3463-EC_EUROPA_covid/v1 #                       2,386 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,386 sentences
+  - opus_ELRC-40-Information_Portal_C/v1 #                    1,828 sentences
+  - opus_ELRC-Information_Portal/v1 #                         1,828 sentences
+  - opus_ELRC-3062-wikipedia_health/v1 #                      1,146 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           1,146 sentences
+  - opus_ELRC_2922/v1 #                                       1,145 sentences
+  - opus_ELRC-3201-antibiotic/v1 #                              965 sentences
+  - opus_ELRC-3292-EUROPARL_covid/v1 #                          557 sentences
+  - opus_ELRC-2749-vaccination/v1 #                             520 sentences
+  - opus_ELRC-vaccination/v1 #                                  520 sentences
+  - opus_ELRC-2404-Czech_Supreme_Audit/v1 #                     403 sentences
+  - opus_ELRC_2923/v1 #                                         319 sentences
+  - opus_ELRC-2407-Czech_Supreme_Audit/v1 #                     234 sentences
+  - mtdata_ELRC-information_portal_czech_president_czech_castle-1-ces-eng
+  - mtdata_ELRC-electronic_exchange_social_security_information-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2018_reports-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2008_2017_reports-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2003_2017_press_releases-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2018_press_releases-1-ces-eng
+  - mtdata_ELRC-eu_publications_medical_v2-1-ces-eng
+  - mtdata_EU-eac_forms-1-ces-eng #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-ces-eng #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-ces-eng #                            ~533,693 sentences (60.3 MB)
+  - mtdata_Lindat-khresmoi_summary_test-2-ces-eng #        ~11,808 sentences (1.3 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-ces #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-commoncrawl_wmt13-1-ces-eng #         ~8,126,649 sentences (918.3 MB)
+  - mtdata_Statmt-europarl_wmt13-7-ces-eng #            ~5,819,755 sentences (657.6 MB)
+  - mtdata_Statmt-news_commentary_wmt18-13-ces-eng #    ~1,001,393 sentences (113.2 MB)
+  - mtdata_Statmt-wiki_titles-1-ces-eng #                  ~45,242 sentences (5.1 MB)
+  - mtdata_Statmt-wiki_titles-2-ces-eng #                  ~47,995 sentences (5.4 MB)
+  - mtdata_Tilde-eesc-2017-ces-eng #                    ~1,157,475 sentences (130.8 MB)
+  - mtdata_Tilde-ema-2016-ces-eng #                       ~244,524 sentences (27.6 MB)
+  - mtdata_Tilde-rapid-2019-ces-eng #                     ~255,063 sentences (28.8 MB)
+
+  # The monolingual data contains:
+  #   ~55,777,868 sentences
+  mono-src:
+  - news-crawl_news.2007  #             ~34,513 sentences (3.9M)
+  - news-crawl_news.2008 #          ~1,840,707 sentences (208M)
+  - news-crawl_news.2009 #          ~2,079,646 sentences (235M)
+  - news-crawl_news.2010 #          ~1,247,787 sentences (141M)
+  - news-crawl_news.2011 #          ~3,185,840 sentences (360M)
+  - news-crawl_news.2012 #          ~2,964,601 sentences (335M)
+  - news-crawl_news.2013 #          ~3,389,380 sentences (383M)
+  - news-crawl_news.2014 #          ~2,973,451 sentences (336M)
+  - news-crawl_news.2015 #          ~3,026,548 sentences (342M)
+  - news-crawl_news.2016 #          ~2,159,292 sentences (244M)
+  - news-crawl_news.2017 #          ~2,849,557 sentences (322M)
+  - news-crawl_news.2018 #          ~2,637,168 sentences (298M)
+  - news-crawl_news.2019 #          ~5,513,274 sentences (623M)
+  - news-crawl_news.2020 #          ~7,451,327 sentences (842M)
+  - news-crawl_news.2021 #          ~5,265,486 sentences (595M)
+  - news-crawl_news.2022 #          ~3,884,955 sentences (439M)
+  - news-crawl_news.2023 #          ~5,274,336 sentences (596M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/da-en-spring-2024.yml b/configs/da-en-spring-2024.yml
new file mode 100644
index 000000000..336f0c593
--- /dev/null
+++ b/configs/da-en-spring-2024.yml
@@ -0,0 +1,235 @@
+# The initial configuration was generated using:
+# task config-generator -- da en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: da
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-dan
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   161,668,955 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (52,273,664 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-www.norden.org-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-mst.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-ufm.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.dst.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.dma.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.geus.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-naturstyrelsen.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.trm.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-um.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.aarhus2017.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.odense.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.visitvejle.com-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.visitdenmark.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-slks.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-natmus.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-gallery_denmark-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-royal_danish_library-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-danish_fsa-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-uk.fm.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-spillemyndigheden.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-rigsrevisionen.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-emea-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-dan-eng - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-dan - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-dan-eng - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-dan-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-dan - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-dan-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-dan_DK-eng - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-dan-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       52,273,664 sentences
+  - opus_ParaCrawl/v9 #                                  34,207,840 sentences
+  - opus_OpenSubtitles/v2018 #                           14,474,569 sentences
+  - opus_ELRC-4248-NTEU_TierA/v1 #                       13,756,130 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,556,334 sentences
+  - opus_CCAligned/v1 #                                  10,738,610 sentences
+  - opus_DGT/v2019 #                                      5,152,323 sentences
+  - opus_EUbookshop/v2 #                                  4,980,755 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,084,707 sentences
+  - opus_XLEnt/v1.2 #                                     3,042,401 sentences
+  - opus_Europarl/v8 #                                    1,991,647 sentences
+  - opus_EMEA/v3 #                                        1,093,780 sentences
+  - opus_JRC-Acquis/v3.0 #                                  808,916 sentences
+  - opus_ELRC-2716-EMEA/v1 #                                775,676 sentences
+  - opus_ELRC_2682/v1 #                                     775,675 sentences
+  - opus_WikiMatrix/v1 #                                    436,052 sentences
+  - opus_KDE4/v2 #                                          194,410 sentences
+  - opus_QED/v2.0a #                                        175,384 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        145,352 sentences
+  - opus_ECB/v1 #                                           138,154 sentences
+  - opus_ELITR-ECA/v1 #                                     135,384 sentences
+  - opus_TED2020/v1 #                                        72,113 sentences
+  - opus_wikimedia/v20230407 #                               69,969 sentences
+  - opus_bible-uedin/v1 #                                    62,113 sentences
+  - opus_NeuLab-TedTalks/v1 #                                48,462 sentences
+  - opus_ELRC-847-mst.dk/v1 #                                36,750 sentences
+  - opus_ELRC-730-www.norden.org/v1 #                        36,626 sentences
+  - opus_ELRC-www.norden.org/v1 #                            36,626 sentences
+  - opus_Tatoeba/v2023-04-12 #                               32,790 sentences
+  - opus_ELRC-850-www.dst.dk/v1 #                            22,817 sentences
+  - opus_ELRC-848-laegemiddelstyrelsen/v1 #                  22,700 sentences
+  - opus_ELRC-3567-EUR_LEX_covid/v1 #                        21,239 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   21,239 sentences
+  - opus_ELRC-2013-EUIPO_2017/v1 #                           17,269 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,269 sentences
+  - opus_ELRA-W0214/v1 #                                     16,243 sentences
+  - opus_ELRC-antibiotic/v1 #                                13,310 sentences
+  - opus_ELRC-2877-EU_publications_medi/v1 #                 13,243 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,243 sentences
+  - opus_ELRC-851-www.vikingeskibsmuse/v1 #                  12,404 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            11,723 sentences
+  - opus_ELRC-849-ufm.dk/v1 #                                10,054 sentences
+  - opus_EUconst/v1 #                                        10,032 sentences
+  - opus_ELRC-1062-rigsrevisionen.dk/v1 #                     8,234 sentences
+  - opus_GlobalVoices/v2018q4 #                               7,311 sentences
+  - opus_ELRC-904-uk.fm.dk/v1 #                               6,949 sentences
+  - opus_ELRC-3608-presscorner_covid/v1 #                     6,262 sentences
+  - opus_ELRC-892-slks.dk/v1 #                                4,956 sentences
+  - opus_ELRC-885-www.aarhus2017.dk/v1 #                      4,709 sentences
+  - opus_TildeMODEL/v2018 #                                   4,420 sentences
+  - opus_ELRC-397-Danish_Higher_Educat/v1 #                   4,395 sentences
+  - opus_ELRA-W0157/v1 #                                      4,394 sentences
+  - opus_ELRC-439-Danish_Higher_Educat/v1 #                   4,149 sentences
+  - opus_ELRC-893-natmus.dk/v1 #                              3,950 sentences
+  - opus_ELRC-394-Danish_Higher_Educat/v1 #                   3,719 sentences
+  - opus_ELRC_3382/v1 #                                       3,406 sentences
+  - opus_ELRC-905-spillemyndigheden.dk/v1 #                   3,355 sentences
+  - opus_ELRC-856-naturstyrelsen.dk/v1 #                      3,118 sentences
+  - opus_ELRC-859-um.dk/v1 #                                  3,055 sentences
+  - opus_ELRC-857-www.trm.dk/v1 #                             3,015 sentences
+  - opus_ELRC-852-www.dma.dk/v1 #                             3,010 sentences
+  - opus_ELRC-3466-EC_EUROPA_covid/v1 #                       2,804 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,804 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,578 sentences
+  - opus_ELRC-897-Denmark_Space_Instit/v1 #                   1,940 sentences
+  - opus_ELRC-899-Danish_FSA/v1 #                             1,931 sentences
+  - opus_ELRC-426-Danish_Higher_Educat/v1 #                   1,886 sentences
+  - opus_ELRC-854-www.geus.dk/v1 #                            1,655 sentences
+  - opus_ELRC-891-www.visitdenmark.dk/v1 #                    1,603 sentences
+  - opus_ELRC-895-Royal_Danish_Library/v1 #                   1,547 sentences
+  - opus_ELRC-889-www.visitvejle.com/v1 #                     1,472 sentences
+  - opus_ELRC-886-www.odense.dk/v1 #                          1,427 sentences
+  - opus_ELRC-901-Denmark_Prosecution_/v1 #                   1,163 sentences
+  - opus_ELRC-900-Danish_Working_Envir/v1 #                   1,138 sentences
+  - opus_ELRC-890-Holstebro_Kunstmuseu/v1 #                   1,023 sentences
+  - opus_ELRC-3204-antibiotic/v1 #                              801 sentences
+  - opus_ELRC-894-Gallery_Denmark/v1 #                          769 sentences
+  - opus_ELRC-3295-EUROPARL_covid/v1 #                          634 sentences
+  - opus_ELRC-3066-wikipedia_health/v1 #                        523 sentences
+  - opus_ELRC-wikipedia_health/v1 #                             523 sentences
+  - opus_ELRC_2922/v1 #                                         522 sentences
+  - opus_tldr-pages/v2023-08-29 #                               495 sentences
+  - opus_ELRC-2754-vaccination/v1 #                             462 sentences
+  - opus_ELRC-vaccination/v1 #                                  462 sentences
+  - opus_ELRC_2923/v1 #                                         389 sentences
+  - mtdata_ELRC-danish_higher_education_science_3-1-dan-eng
+  - mtdata_ELRC-danish_higher_education_science_2-1-dan-eng
+  - mtdata_ELRC-danish_higher_education_science-1-dan-eng
+  - mtdata_ELRC-danish_higher_education_science_4-1-dan-eng
+  - mtdata_ELRC-laegemiddelstyrelsen.dk-1-dan-eng
+  - mtdata_ELRC-www.vikingeskibsmuseet.dk-1-dan-eng
+  - mtdata_ELRC-holstebro_kunstmuseum-1-dan-eng
+  - mtdata_ELRC-denmark_space_institute-1-dan-eng
+  - mtdata_ELRC-danish_working_environment_authority-1-dan-eng
+  - mtdata_ELRC-denmark_prosecution_service-1-dan-eng
+  - mtdata_ELRC-eu_publications_medical_v2-1-dan-eng
+  - mtdata_ELRC-nteu_tierb-1-dan-eng
+  - mtdata_EU-eac_forms-1-dan-eng #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-dan-eng #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-dan-eng #                          ~1,040,518 sentences (117.6 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-dan #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-dan-eng #                    ~1,936,973 sentences (218.9 MB)
+  - mtdata_Tilde-ema-2016-dan-eng #                       ~215,232 sentences (24.3 MB)
+  - mtdata_Tilde-rapid-2016-dan-eng #                     ~451,067 sentences (51.0 MB)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-src: []
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/el-en-spring-2024.yml b/configs/el-en-spring-2024.yml
new file mode 100644
index 000000000..7a416e140
--- /dev/null
+++ b/configs/el-en-spring-2024.yml
@@ -0,0 +1,260 @@
+# The initial configuration was generated using:
+# task config-generator -- el en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: el
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ell
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_mtedx/test
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_mtedx/valid
+
+  # The training data contains:
+  #   159,976,981 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (49,262,631 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (3,583,975 sentences)
+  #  - opus_ELRC-3857-State_Budget_Executi/v1 - not enough data  (180 sentences)
+  #  - opus_ELRC-745-Convention_against_T/v1 - not enough data  (165 sentences)
+  #  - opus_ELRA-W0309/v1 - not enough data  (164 sentences)
+  #  - opus_ELRC-656-Macroeconomic_Develo/v1 - not enough data  (151 sentences)
+  #  - opus_ELRC-496-Convention_transfer_/v1 - not enough data  (121 sentences)
+  #  - opus_ELRA-W0196/v1 - not enough data  (120 sentences)
+  #  - opus_ELRA-W0207/v1 - not enough data  (101 sentences)
+  #  - opus_ELRA-W0308/v1 - not enough data  (87 sentences)
+  #  - opus_ELRC-662-Expression_interest/v1 - not enough data  (85 sentences)
+  #  - opus_ELRA-W0209/v1 - not enough data  (84 sentences)
+  #  - opus_ELRC-648-Letter_rights_person/v1 - not enough data  (65 sentences)
+  #  - opus_ELRC-658-Methodological_Recon/v1 - not enough data  (45 sentences)
+  #  - opus_ELRA-W0208/v1 - not enough data  (44 sentences)
+  #  - opus_ELRC-1022-COMPULSORY_EXPROPRIA/v1 - not enough data  (38 sentences)
+  #  - opus_ELRC-3856-PRESS/v1 - not enough data  (35 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (30 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (29 sentences)
+  #  - opus_ELRC-1021-Commitment_Property_/v1 - not enough data  (23 sentences)
+  #  - opus_ELRC-403-Rights_Arrested/v1 - not enough data  (22 sentences)
+  #  - opus_ELRA-W0301/v1 - not enough data  (16 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-greek_administration-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-greek_law-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-press_releases_pio-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-constitution_greece-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-emea-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-ell - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-ell-eng - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-ell-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-ell - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-ell-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-ell_GR-eng - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-ell-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       49,262,631 sentences
+  - opus_OpenSubtitles/v2018 #                           40,492,942 sentences
+  - opus_ParaCrawl/v9 #                                  21,402,471 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,691,653 sentences
+  - opus_CCAligned/v1 #                                   8,878,509 sentences
+  - opus_DGT/v2019 #                                      5,099,790 sentences
+  - opus_EUbookshop/v2 #                                  4,022,952 sentences
+  - opus_MaCoCu/v2 #                                      3,583,978 sentences
+  - opus_XLEnt/v1.2 #                                     2,949,219 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   1,850,804 sentences
+  - opus_Europarl/v8 #                                    1,292,180 sentences
+  - opus_EMEA/v3 #                                        1,073,225 sentences
+  - opus_ELRC-2711-EMEA/v1 #                                781,988 sentences
+  - opus_ELRC_2682/v1 #                                     781,987 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              742,987 sentences
+  - opus_WikiMatrix/v1 #                                    620,802 sentences
+  - opus_wikimedia/v20230407 #                              589,733 sentences
+  - opus_QED/v2.0a #                                        550,438 sentences
+  - opus_ELITR-ECA/v1 #                                     381,561 sentences
+  - opus_TED2020/v1 #                                       269,407 sentences
+  - opus_SETIMES/v2 #                                       227,168 sentences
+  - opus_NeuLab-TedTalks/v1 #                               153,493 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        152,003 sentences
+  - opus_KDE4/v2 #                                          144,894 sentences
+  - opus_GlobalVoices/v2018q4 #                             120,421 sentences
+  - opus_ELRC-Press_Releases/v1 #                           117,171 sentences
+  - opus_Wikipedia/v1.0 #                                   104,076 sentences
+  - opus_ECB/v1 #                                           102,986 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_ELRA-W0202/v1 #                                     61,967 sentences
+  - opus_Tatoeba/v2023-04-12 #                               25,995 sentences
+  - opus_ELRC-3562-EUR_LEX_covid/v1 #                        23,024 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   23,024 sentences
+  - opus_ELRC-842-Quarterly_Reports_Pa/v1 #                  21,248 sentences
+  - opus_ELRA-W0243/v1 #                                     21,247 sentences
+  - opus_ELRC-1175-EUIPO_2017/v1 #                           20,027 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                20,027 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            19,587 sentences
+  - opus_JRC-Acquis/v3.0 #                                   17,717 sentences
+  - opus_GNOME/v1 #                                          17,389 sentences
+  - opus_ELRC-843-collection_reports_G/v1 #                  16,286 sentences
+  - opus_ELRA-W0244/v1 #                                     16,285 sentences
+  - opus_ELRC-antibiotic/v1 #                                16,083 sentences
+  - opus_ELRC-2872-EU_publications_medi/v1 #                 13,092 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,092 sentences
+  - opus_ELRC-649-Greek_administration/v1 #                  12,510 sentences
+  - opus_ELRA-W0203/v1 #                                     12,509 sentences
+  - opus_EUconst/v1 #                                         9,990 sentences
+  - opus_SPC/v1 #                                             8,181 sentences
+  - opus_ELRC-3603-presscorner_covid/v1 #                     6,635 sentences
+  - opus_ELRC-936-Prime_Minister_Helle/v1 #                   5,323 sentences
+  - opus_ELRA-W0272/v1 #                                      5,322 sentences
+  - opus_TildeMODEL/v2018 #                                   5,238 sentences
+  - opus_ELRC-1787-Press_Releases_PIO/v1 #                    5,163 sentences
+  - opus_ELRC-PIO_Publication/v1 #                            3,949 sentences
+  - opus_ELRC-1984-Hellenic_Gaming_Comm/v1 #                  3,875 sentences
+  - opus_ELRC_3382/v1 #                                       3,818 sentences
+  - opus_ELRC-932-Hellenic_Foreign_Aff/v1 #                   3,471 sentences
+  - opus_ELRA-W0271/v1 #                                      3,470 sentences
+  - opus_ELRC-1067-PIO_Publication_Wind/v1 #                  2,629 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,531 sentences
+  - opus_ELRC-3461-EC_EUROPA_covid/v1 #                       2,234 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,234 sentences
+  - opus_ELRC-1981-EQF_Referencing_Repo/v1 #                  2,100 sentences
+  - opus_ELRC-652-Greek_law/v1 #                              1,980 sentences
+  - opus_ELRA-W0205/v1 #                                      1,979 sentences
+  - opus_ELRC-3058-wikipedia_health/v1 #                      1,871 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           1,871 sentences
+  - opus_ELRC-1020-collection_about_Cyp/v1 #                  1,870 sentences
+  - opus_ELRC_2922/v1 #                                       1,870 sentences
+  - opus_ELRC-1070-Press_Information_Cy/v1 #                  1,863 sentences
+  - opus_ELRC-1970-governmental_about_M/v1 #                  1,435 sentences
+  - opus_ELRC-1065-PIO_Publication_Cypr/v1 #                  1,321 sentences
+  - opus_Books/v1 #                                           1,285 sentences
+  - opus_ELRC-419-Greek_legislation_An/v1 #                   1,070 sentences
+  - opus_ELRA-W0164/v1 #                                      1,069 sentences
+  - opus_ELRC-1986-Constitution_Greece/v1 #                   1,022 sentences
+  - opus_ELRC-3199-antibiotic/v1 #                              990 sentences
+  - opus_ELRC-5160-Press_Releases_PIO/v1 #                      955 sentences
+  - opus_ELRC-3290-EUROPARL_covid/v1 #                          692 sentences
+  - opus_ELRC-2735-vaccination/v1 #                             519 sentences
+  - opus_ELRC-vaccination/v1 #                                  519 sentences
+  - opus_ELRC-663-Memorandum_a_ESM/v1 #                         434 sentences
+  - opus_ELRA-W0210/v1 #                                        433 sentences
+  - opus_ELRC_2923/v1 #                                         420 sentences
+  - opus_ELRC-646-International_Judici/v1 #                     289 sentences
+  - opus_ELRA-W0307/v1 #                                        288 sentences
+  - mtdata_ELRC-rights_arrested-1-ell-eng
+  - mtdata_ELRC-swedish_social_security-1-ell-eng
+  - mtdata_ELRC-greek_legislation_anticorruption_plan-1-ell-eng
+  - mtdata_ELRC-convention_transfer_sentenced_persons-1-ell-eng
+  - mtdata_ELRC-international_judicial_cooperation_civil_matters-1-ell-eng
+  - mtdata_ELRC-letter_rights_persons_arrested_or_detained-1-ell-eng
+  - mtdata_ELRC-macroeconomic_developments-1-ell-eng
+  - mtdata_ELRC-methodological_reconciliation-1-ell-eng
+  - mtdata_ELRC-expression_interest-1-ell-eng
+  - mtdata_ELRC-memorandum_a_esm_programme-1-ell-eng
+  - mtdata_ELRC-convention_against_torture_other_cruel_inhuman_or_degrading_treatment_or_punishment_united_nations-1-ell-eng
+  - mtdata_ELRC-quarterly_reports_parliamentary_budget-1-ell-eng
+  - mtdata_ELRC-collection_reports_greek_power_corporation-1-ell-eng
+  - mtdata_ELRC-hellenic_foreign_affairs_announcements-1-ell-eng
+  - mtdata_ELRC-prime_minister_hellenic-1-ell-eng
+  - mtdata_ELRC-collection_about_cyprus_problem-1-ell-eng
+  - mtdata_ELRC-commitment_property_open-1-ell-eng
+  - mtdata_ELRC-compulsory_expropriation_process_greece-1-ell-eng
+  - mtdata_ELRC-pio_publication_cyprus_has_always_been_europe_2017-1-ell-eng
+  - mtdata_ELRC-pio_publication_window_cyprus-1-ell-eng
+  - mtdata_ELRC-press_information_cyprus-1-ell-eng
+  - mtdata_ELRC-governmental_about_migration_policy-1-ell-eng
+  - mtdata_ELRC-eqf_referencing_report-1-ell-eng
+  - mtdata_ELRC-hellenic_gaming_commission-1-ell-eng
+  - mtdata_ELRC-eu_publications_medical_v2-1-ell-eng
+  - mtdata_EU-eac_forms-1-ell-eng #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-ell-eng #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-ell-eng #                          ~1,178,828 sentences (133.2 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-ell #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-ell-eng #                    ~2,393,754 sentences (270.5 MB)
+  - mtdata_Tilde-ema-2016-ell-eng #                       ~244,548 sentences (27.6 MB)
+  - mtdata_Tilde-rapid-2016-ell-eng #                     ~586,564 sentences (66.3 MB)
+
+  # The monolingual data contains:
+  #   ~27,097,343 sentences
+  mono-src:
+  - news-crawl_news.2015  #          ~1,115,044 sentences (126M)
+  - news-crawl_news.2019 #          ~2,398,230 sentences (271M)
+  - news-crawl_news.2020 #          ~5,327,433 sentences (602M)
+  - news-crawl_news.2021 #          ~5,238,938 sentences (592M)
+  - news-crawl_news.2022 #          ~6,725,663 sentences (760M)
+  - news-crawl_news.2023 #          ~6,292,035 sentences (711M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-bs-spring-2024.yml b/configs/en-bs-spring-2024.yml
new file mode 100644
index 000000000..f9f99bde7
--- /dev/null
+++ b/configs/en-bs-spring-2024.yml
@@ -0,0 +1,118 @@
+# The initial configuration was generated using:
+# task config-generator -- en bs --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: bs
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-bos
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   94,895,603 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (240,013 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-bos-eng - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-bos-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-bos - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-bos_BA-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       79,334,034 sentences
+  - opus_OpenSubtitles/v2018 #                           14,041,160 sentences
+  - opus_XLEnt/v1.2 #                                       266,696 sentences
+  - opus_Tanzil/v1 #                                        246,913 sentences
+  - opus_HPLT/v1.1 #                                        240,015 sentences
+  - opus_WikiMatrix/v1 #                                    210,691 sentences
+  - opus_CCAligned/v1 #                                     192,099 sentences
+  - opus_GNOME/v1 #                                         164,960 sentences
+  - opus_SETIMES/v2 #                                       138,387 sentences
+  - opus_wikimedia/v20230407 #                               28,167 sentences
+  - opus_QED/v2.0a #                                         12,541 sentences
+  - opus_TED2020/v1 #                                        11,638 sentences
+  - opus_NeuLab-TedTalks/v1 #                                 6,136 sentences
+  - opus_EUbookshop/v2 #                                        558 sentences
+  - opus_Tatoeba/v2023-04-12 #                                  515 sentences
+  - opus_tldr-pages/v2023-08-29 #                               479 sentences
+  - opus_ELRC-3047-wikipedia_health/v1 #                        205 sentences
+  - opus_ELRC-wikipedia_health/v1 #                             205 sentences
+  - opus_ELRC_2922/v1 #                                         204 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-bos #             ~3,117,009 sentences (352.2 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~8,982,298 sentences
+  mono-trg:
+  - news-crawl_news.2018  #              ~8,849 sentences (1.0M)
+  - news-crawl_news.2019 #            ~920,353 sentences (104M)
+  - news-crawl_news.2020 #          ~1,734,513 sentences (196M)
+  - news-crawl_news.2021 #          ~2,079,646 sentences (235M)
+  - news-crawl_news.2022 #          ~2,132,743 sentences (241M)
+  - news-crawl_news.2023 #          ~2,106,194 sentences (238M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-cs-spring-2024.yml b/configs/en-cs-spring-2024.yml
new file mode 100644
index 000000000..c71aaf91c
--- /dev/null
+++ b/configs/en-cs-spring-2024.yml
@@ -0,0 +1,234 @@
+# The initial configuration was generated using:
+# task config-generator -- en cs --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: cs
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Lindat-khresmoi_summary_dev-2-ces-eng
+  - mtdata_Neulab-tedtalks_dev-1-eng-ces
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt19
+  - sacrebleu_aug-mix_wmt18/test-ts
+  - sacrebleu_aug-mix_wmt16
+  - sacrebleu_aug-mix_wmt14
+  - sacrebleu_aug-mix_wmt13
+  - sacrebleu_aug-mix_wmt11
+  - sacrebleu_aug-mix_wmt09
+  - sacrebleu_aug-mix_wmt08/nc
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt20
+  - sacrebleu_wmt18
+  - sacrebleu_wmt17
+  - sacrebleu_wmt15
+  - sacrebleu_wmt14/full
+  - sacrebleu_wmt12
+  - sacrebleu_wmt10
+  - sacrebleu_wmt08
+  - sacrebleu_multi30k/2016
+
+  # The training data contains:
+  #   213,550,488 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (56,307,029 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - opus_WikiTitles/v3 - ignored datasets (0 sentences)
+  #  - mtdata_ELRC-euipo_2017-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-emea-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-ces-eng - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-ces - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-ces-eng - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-ces-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-3-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-ces - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-ces - duplicate with opus
+  #  - mtdata_Statmt-europarl-9-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-14-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-15-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-16-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-10-ces-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-ces_CZ-eng - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-ces-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       56,307,029 sentences
+  - opus_ParaCrawl/v9 #                                  50,633,505 sentences
+  - opus_OpenSubtitles/v2018 #                           42,346,436 sentences
+  - opus_StanfordNLP-NMT/v1.0 #                          15,793,121 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,891,707 sentences
+  - opus_CCAligned/v1 #                                  12,730,121 sentences
+  - opus_DGT/v2019 #                                      5,207,753 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   4,813,030 sentences
+  - opus_XLEnt/v1.2 #                                     3,894,132 sentences
+  - opus_JRC-Acquis/v3.0 #                                1,273,411 sentences
+  - opus_ELRC-5067-SciPar/v1 #                            1,064,385 sentences
+  - opus_EMEA/v3 #                                        1,053,385 sentences
+  - opus_ELRC-2713-EMEA/v1 #                                779,083 sentences
+  - opus_ELRC_2682/v1 #                                     779,082 sentences
+  - opus_Europarl/v8 #                                      647,095 sentences
+  - opus_WikiMatrix/v1 #                                    519,195 sentences
+  - opus_EUbookshop/v2 #                                    455,472 sentences
+  - opus_QED/v2.0a #                                        441,508 sentences
+  - opus_ELITR-ECA/v1 #                                     295,788 sentences
+  - opus_Tanzil/v1 #                                        233,399 sentences
+  - opus_News-Commentary/v16 #                              218,509 sentences
+  - opus_TED2020/v1 #                                       170,611 sentences
+  - opus_wikimedia/v20230407 #                              146,717 sentences
+  - opus_KDE4/v2 #                                          134,071 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        129,652 sentences
+  - opus_NeuLab-TedTalks/v1 #                               111,107 sentences
+  - opus_ECB/v1 #                                            63,716 sentences
+  - opus_bible-uedin/v1 #                                    62,151 sentences
+  - opus_WMT-News/v2019 #                                    44,859 sentences
+  - opus_Tatoeba/v2023-04-12 #                               34,628 sentences
+  - opus_PHP/v1 #                                            32,983 sentences
+  - opus_Wikipedia/v1.0 #                                    27,723 sentences
+  - opus_ELRC-3564-EUR_LEX_covid/v1 #                        22,637 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,637 sentences
+  - opus_GlobalVoices/v2018q4 #                              18,876 sentences
+  - opus_ELRC-427-Electronic_Exchange_/v1 #                  17,357 sentences
+  - opus_ELRC-2012-EUIPO_2017/v1 #                           15,945 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                15,945 sentences
+  - opus_ELRC-antibiotic/v1 #                                15,678 sentences
+  - opus_ELRC-2874-EU_publications_medi/v1 #                 13,161 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,161 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            11,142 sentences
+  - opus_EUconst/v1 #                                         9,953 sentences
+  - opus_ELRC-3605-presscorner_covid/v1 #                     6,229 sentences
+  - opus_ELRC-2406-Czech_Supreme_Audit/v1 #                   4,771 sentences
+  - opus_ELRC_3382/v1 #                                       3,722 sentences
+  - opus_TildeMODEL/v2018 #                                   3,100 sentences
+  - opus_ELRC-2405-Czech_Supreme_Audit/v1 #                   2,868 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,559 sentences
+  - opus_ELRC-3463-EC_EUROPA_covid/v1 #                       2,386 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,386 sentences
+  - opus_ELRC-40-Information_Portal_C/v1 #                    1,828 sentences
+  - opus_ELRC-Information_Portal/v1 #                         1,828 sentences
+  - opus_ELRC-3062-wikipedia_health/v1 #                      1,146 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           1,146 sentences
+  - opus_ELRC_2922/v1 #                                       1,145 sentences
+  - opus_ELRC-3201-antibiotic/v1 #                              965 sentences
+  - opus_ELRC-3292-EUROPARL_covid/v1 #                          557 sentences
+  - opus_ELRC-2749-vaccination/v1 #                             520 sentences
+  - opus_ELRC-vaccination/v1 #                                  520 sentences
+  - opus_ELRC-2404-Czech_Supreme_Audit/v1 #                     403 sentences
+  - opus_ELRC_2923/v1 #                                         319 sentences
+  - opus_ELRC-2407-Czech_Supreme_Audit/v1 #                     234 sentences
+  - mtdata_ELRC-information_portal_czech_president_czech_castle-1-ces-eng
+  - mtdata_ELRC-electronic_exchange_social_security_information-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2018_reports-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2008_2017_reports-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2003_2017_press_releases-1-ces-eng
+  - mtdata_ELRC-czech_supreme_audit_office_2018_press_releases-1-ces-eng
+  - mtdata_ELRC-eu_publications_medical_v2-1-ces-eng
+  - mtdata_EU-eac_forms-1-ces-eng #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-ces-eng #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-ces-eng #                            ~533,693 sentences (60.3 MB)
+  - mtdata_Lindat-khresmoi_summary_test-2-ces-eng #        ~11,808 sentences (1.3 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-ces #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-commoncrawl_wmt13-1-ces-eng #         ~8,126,649 sentences (918.3 MB)
+  - mtdata_Statmt-europarl_wmt13-7-ces-eng #            ~5,819,755 sentences (657.6 MB)
+  - mtdata_Statmt-news_commentary_wmt18-13-ces-eng #    ~1,001,393 sentences (113.2 MB)
+  - mtdata_Statmt-wiki_titles-1-ces-eng #                  ~45,242 sentences (5.1 MB)
+  - mtdata_Statmt-wiki_titles-2-ces-eng #                  ~47,995 sentences (5.4 MB)
+  - mtdata_Tilde-eesc-2017-ces-eng #                    ~1,157,475 sentences (130.8 MB)
+  - mtdata_Tilde-ema-2016-ces-eng #                       ~244,524 sentences (27.6 MB)
+  - mtdata_Tilde-rapid-2019-ces-eng #                     ~255,063 sentences (28.8 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~55,777,868 sentences
+  mono-trg:
+  - news-crawl_news.2007  #             ~34,513 sentences (3.9M)
+  - news-crawl_news.2008 #          ~1,840,707 sentences (208M)
+  - news-crawl_news.2009 #          ~2,079,646 sentences (235M)
+  - news-crawl_news.2010 #          ~1,247,787 sentences (141M)
+  - news-crawl_news.2011 #          ~3,185,840 sentences (360M)
+  - news-crawl_news.2012 #          ~2,964,601 sentences (335M)
+  - news-crawl_news.2013 #          ~3,389,380 sentences (383M)
+  - news-crawl_news.2014 #          ~2,973,451 sentences (336M)
+  - news-crawl_news.2015 #          ~3,026,548 sentences (342M)
+  - news-crawl_news.2016 #          ~2,159,292 sentences (244M)
+  - news-crawl_news.2017 #          ~2,849,557 sentences (322M)
+  - news-crawl_news.2018 #          ~2,637,168 sentences (298M)
+  - news-crawl_news.2019 #          ~5,513,274 sentences (623M)
+  - news-crawl_news.2020 #          ~7,451,327 sentences (842M)
+  - news-crawl_news.2021 #          ~5,265,486 sentences (595M)
+  - news-crawl_news.2022 #          ~3,884,955 sentences (439M)
+  - news-crawl_news.2023 #          ~5,274,336 sentences (596M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-da-spring-2024.yml b/configs/en-da-spring-2024.yml
new file mode 100644
index 000000000..2861b50ec
--- /dev/null
+++ b/configs/en-da-spring-2024.yml
@@ -0,0 +1,235 @@
+# The initial configuration was generated using:
+# task config-generator -- en da --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: da
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-dan
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   161,668,955 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (52,273,664 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-www.norden.org-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-mst.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-ufm.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.dst.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.dma.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.geus.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-naturstyrelsen.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.trm.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-um.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.aarhus2017.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.odense.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.visitvejle.com-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-www.visitdenmark.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-slks.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-natmus.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-gallery_denmark-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-royal_danish_library-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-danish_fsa-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-uk.fm.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-spillemyndigheden.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-rigsrevisionen.dk-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-emea-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-dan-eng - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-dan-eng - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-dan - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-dan-eng - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-dan-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-dan - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-dan - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-dan-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-dan_DK-eng - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-dan-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       52,273,664 sentences
+  - opus_ParaCrawl/v9 #                                  34,207,840 sentences
+  - opus_OpenSubtitles/v2018 #                           14,474,569 sentences
+  - opus_ELRC-4248-NTEU_TierA/v1 #                       13,756,130 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,556,334 sentences
+  - opus_CCAligned/v1 #                                  10,738,610 sentences
+  - opus_DGT/v2019 #                                      5,152,323 sentences
+  - opus_EUbookshop/v2 #                                  4,980,755 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,084,707 sentences
+  - opus_XLEnt/v1.2 #                                     3,042,401 sentences
+  - opus_Europarl/v8 #                                    1,991,647 sentences
+  - opus_EMEA/v3 #                                        1,093,780 sentences
+  - opus_JRC-Acquis/v3.0 #                                  808,916 sentences
+  - opus_ELRC-2716-EMEA/v1 #                                775,676 sentences
+  - opus_ELRC_2682/v1 #                                     775,675 sentences
+  - opus_WikiMatrix/v1 #                                    436,052 sentences
+  - opus_KDE4/v2 #                                          194,410 sentences
+  - opus_QED/v2.0a #                                        175,384 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        145,352 sentences
+  - opus_ECB/v1 #                                           138,154 sentences
+  - opus_ELITR-ECA/v1 #                                     135,384 sentences
+  - opus_TED2020/v1 #                                        72,113 sentences
+  - opus_wikimedia/v20230407 #                               69,969 sentences
+  - opus_bible-uedin/v1 #                                    62,113 sentences
+  - opus_NeuLab-TedTalks/v1 #                                48,462 sentences
+  - opus_ELRC-847-mst.dk/v1 #                                36,750 sentences
+  - opus_ELRC-730-www.norden.org/v1 #                        36,626 sentences
+  - opus_ELRC-www.norden.org/v1 #                            36,626 sentences
+  - opus_Tatoeba/v2023-04-12 #                               32,790 sentences
+  - opus_ELRC-850-www.dst.dk/v1 #                            22,817 sentences
+  - opus_ELRC-848-laegemiddelstyrelsen/v1 #                  22,700 sentences
+  - opus_ELRC-3567-EUR_LEX_covid/v1 #                        21,239 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   21,239 sentences
+  - opus_ELRC-2013-EUIPO_2017/v1 #                           17,269 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,269 sentences
+  - opus_ELRA-W0214/v1 #                                     16,243 sentences
+  - opus_ELRC-antibiotic/v1 #                                13,310 sentences
+  - opus_ELRC-2877-EU_publications_medi/v1 #                 13,243 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,243 sentences
+  - opus_ELRC-851-www.vikingeskibsmuse/v1 #                  12,404 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            11,723 sentences
+  - opus_ELRC-849-ufm.dk/v1 #                                10,054 sentences
+  - opus_EUconst/v1 #                                        10,032 sentences
+  - opus_ELRC-1062-rigsrevisionen.dk/v1 #                     8,234 sentences
+  - opus_GlobalVoices/v2018q4 #                               7,311 sentences
+  - opus_ELRC-904-uk.fm.dk/v1 #                               6,949 sentences
+  - opus_ELRC-3608-presscorner_covid/v1 #                     6,262 sentences
+  - opus_ELRC-892-slks.dk/v1 #                                4,956 sentences
+  - opus_ELRC-885-www.aarhus2017.dk/v1 #                      4,709 sentences
+  - opus_TildeMODEL/v2018 #                                   4,420 sentences
+  - opus_ELRC-397-Danish_Higher_Educat/v1 #                   4,395 sentences
+  - opus_ELRA-W0157/v1 #                                      4,394 sentences
+  - opus_ELRC-439-Danish_Higher_Educat/v1 #                   4,149 sentences
+  - opus_ELRC-893-natmus.dk/v1 #                              3,950 sentences
+  - opus_ELRC-394-Danish_Higher_Educat/v1 #                   3,719 sentences
+  - opus_ELRC_3382/v1 #                                       3,406 sentences
+  - opus_ELRC-905-spillemyndigheden.dk/v1 #                   3,355 sentences
+  - opus_ELRC-856-naturstyrelsen.dk/v1 #                      3,118 sentences
+  - opus_ELRC-859-um.dk/v1 #                                  3,055 sentences
+  - opus_ELRC-857-www.trm.dk/v1 #                             3,015 sentences
+  - opus_ELRC-852-www.dma.dk/v1 #                             3,010 sentences
+  - opus_ELRC-3466-EC_EUROPA_covid/v1 #                       2,804 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,804 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,578 sentences
+  - opus_ELRC-897-Denmark_Space_Instit/v1 #                   1,940 sentences
+  - opus_ELRC-899-Danish_FSA/v1 #                             1,931 sentences
+  - opus_ELRC-426-Danish_Higher_Educat/v1 #                   1,886 sentences
+  - opus_ELRC-854-www.geus.dk/v1 #                            1,655 sentences
+  - opus_ELRC-891-www.visitdenmark.dk/v1 #                    1,603 sentences
+  - opus_ELRC-895-Royal_Danish_Library/v1 #                   1,547 sentences
+  - opus_ELRC-889-www.visitvejle.com/v1 #                     1,472 sentences
+  - opus_ELRC-886-www.odense.dk/v1 #                          1,427 sentences
+  - opus_ELRC-901-Denmark_Prosecution_/v1 #                   1,163 sentences
+  - opus_ELRC-900-Danish_Working_Envir/v1 #                   1,138 sentences
+  - opus_ELRC-890-Holstebro_Kunstmuseu/v1 #                   1,023 sentences
+  - opus_ELRC-3204-antibiotic/v1 #                              801 sentences
+  - opus_ELRC-894-Gallery_Denmark/v1 #                          769 sentences
+  - opus_ELRC-3295-EUROPARL_covid/v1 #                          634 sentences
+  - opus_ELRC-3066-wikipedia_health/v1 #                        523 sentences
+  - opus_ELRC-wikipedia_health/v1 #                             523 sentences
+  - opus_ELRC_2922/v1 #                                         522 sentences
+  - opus_tldr-pages/v2023-08-29 #                               495 sentences
+  - opus_ELRC-2754-vaccination/v1 #                             462 sentences
+  - opus_ELRC-vaccination/v1 #                                  462 sentences
+  - opus_ELRC_2923/v1 #                                         389 sentences
+  - mtdata_ELRC-danish_higher_education_science_3-1-dan-eng
+  - mtdata_ELRC-danish_higher_education_science_2-1-dan-eng
+  - mtdata_ELRC-danish_higher_education_science-1-dan-eng
+  - mtdata_ELRC-danish_higher_education_science_4-1-dan-eng
+  - mtdata_ELRC-laegemiddelstyrelsen.dk-1-dan-eng
+  - mtdata_ELRC-www.vikingeskibsmuseet.dk-1-dan-eng
+  - mtdata_ELRC-holstebro_kunstmuseum-1-dan-eng
+  - mtdata_ELRC-denmark_space_institute-1-dan-eng
+  - mtdata_ELRC-danish_working_environment_authority-1-dan-eng
+  - mtdata_ELRC-denmark_prosecution_service-1-dan-eng
+  - mtdata_ELRC-eu_publications_medical_v2-1-dan-eng
+  - mtdata_ELRC-nteu_tierb-1-dan-eng
+  - mtdata_EU-eac_forms-1-dan-eng #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-dan-eng #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-dan-eng #                          ~1,040,518 sentences (117.6 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-dan #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-dan-eng #                    ~1,936,973 sentences (218.9 MB)
+  - mtdata_Tilde-ema-2016-dan-eng #                       ~215,232 sentences (24.3 MB)
+  - mtdata_Tilde-rapid-2016-dan-eng #                     ~451,067 sentences (51.0 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-trg: []
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-el-spring-2024.yml b/configs/en-el-spring-2024.yml
new file mode 100644
index 000000000..c5ba9165e
--- /dev/null
+++ b/configs/en-el-spring-2024.yml
@@ -0,0 +1,260 @@
+# The initial configuration was generated using:
+# task config-generator -- en el --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: el
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ell
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_mtedx/test
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_mtedx/valid
+
+  # The training data contains:
+  #   159,976,981 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (49,262,631 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (3,583,975 sentences)
+  #  - opus_ELRC-3857-State_Budget_Executi/v1 - not enough data  (180 sentences)
+  #  - opus_ELRC-745-Convention_against_T/v1 - not enough data  (165 sentences)
+  #  - opus_ELRA-W0309/v1 - not enough data  (164 sentences)
+  #  - opus_ELRC-656-Macroeconomic_Develo/v1 - not enough data  (151 sentences)
+  #  - opus_ELRC-496-Convention_transfer_/v1 - not enough data  (121 sentences)
+  #  - opus_ELRA-W0196/v1 - not enough data  (120 sentences)
+  #  - opus_ELRA-W0207/v1 - not enough data  (101 sentences)
+  #  - opus_ELRA-W0308/v1 - not enough data  (87 sentences)
+  #  - opus_ELRC-662-Expression_interest/v1 - not enough data  (85 sentences)
+  #  - opus_ELRA-W0209/v1 - not enough data  (84 sentences)
+  #  - opus_ELRC-648-Letter_rights_person/v1 - not enough data  (65 sentences)
+  #  - opus_ELRC-658-Methodological_Recon/v1 - not enough data  (45 sentences)
+  #  - opus_ELRA-W0208/v1 - not enough data  (44 sentences)
+  #  - opus_ELRC-1022-COMPULSORY_EXPROPRIA/v1 - not enough data  (38 sentences)
+  #  - opus_ELRC-3856-PRESS/v1 - not enough data  (35 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (30 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (29 sentences)
+  #  - opus_ELRC-1021-Commitment_Property_/v1 - not enough data  (23 sentences)
+  #  - opus_ELRC-403-Rights_Arrested/v1 - not enough data  (22 sentences)
+  #  - opus_ELRA-W0301/v1 - not enough data  (16 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-greek_administration-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-greek_law-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-press_releases_pio-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-constitution_greece-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-emea-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-ell-eng - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-ell - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-ell-eng - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-ell-eng - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-ell - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-ell - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-ell-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-ell_GR-eng - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-ell-eng - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       49,262,631 sentences
+  - opus_OpenSubtitles/v2018 #                           40,492,942 sentences
+  - opus_ParaCrawl/v9 #                                  21,402,471 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,691,653 sentences
+  - opus_CCAligned/v1 #                                   8,878,509 sentences
+  - opus_DGT/v2019 #                                      5,099,790 sentences
+  - opus_EUbookshop/v2 #                                  4,022,952 sentences
+  - opus_MaCoCu/v2 #                                      3,583,978 sentences
+  - opus_XLEnt/v1.2 #                                     2,949,219 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   1,850,804 sentences
+  - opus_Europarl/v8 #                                    1,292,180 sentences
+  - opus_EMEA/v3 #                                        1,073,225 sentences
+  - opus_ELRC-2711-EMEA/v1 #                                781,988 sentences
+  - opus_ELRC_2682/v1 #                                     781,987 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              742,987 sentences
+  - opus_WikiMatrix/v1 #                                    620,802 sentences
+  - opus_wikimedia/v20230407 #                              589,733 sentences
+  - opus_QED/v2.0a #                                        550,438 sentences
+  - opus_ELITR-ECA/v1 #                                     381,561 sentences
+  - opus_TED2020/v1 #                                       269,407 sentences
+  - opus_SETIMES/v2 #                                       227,168 sentences
+  - opus_NeuLab-TedTalks/v1 #                               153,493 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        152,003 sentences
+  - opus_KDE4/v2 #                                          144,894 sentences
+  - opus_GlobalVoices/v2018q4 #                             120,421 sentences
+  - opus_ELRC-Press_Releases/v1 #                           117,171 sentences
+  - opus_Wikipedia/v1.0 #                                   104,076 sentences
+  - opus_ECB/v1 #                                           102,986 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_ELRA-W0202/v1 #                                     61,967 sentences
+  - opus_Tatoeba/v2023-04-12 #                               25,995 sentences
+  - opus_ELRC-3562-EUR_LEX_covid/v1 #                        23,024 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   23,024 sentences
+  - opus_ELRC-842-Quarterly_Reports_Pa/v1 #                  21,248 sentences
+  - opus_ELRA-W0243/v1 #                                     21,247 sentences
+  - opus_ELRC-1175-EUIPO_2017/v1 #                           20,027 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                20,027 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            19,587 sentences
+  - opus_JRC-Acquis/v3.0 #                                   17,717 sentences
+  - opus_GNOME/v1 #                                          17,389 sentences
+  - opus_ELRC-843-collection_reports_G/v1 #                  16,286 sentences
+  - opus_ELRA-W0244/v1 #                                     16,285 sentences
+  - opus_ELRC-antibiotic/v1 #                                16,083 sentences
+  - opus_ELRC-2872-EU_publications_medi/v1 #                 13,092 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,092 sentences
+  - opus_ELRC-649-Greek_administration/v1 #                  12,510 sentences
+  - opus_ELRA-W0203/v1 #                                     12,509 sentences
+  - opus_EUconst/v1 #                                         9,990 sentences
+  - opus_SPC/v1 #                                             8,181 sentences
+  - opus_ELRC-3603-presscorner_covid/v1 #                     6,635 sentences
+  - opus_ELRC-936-Prime_Minister_Helle/v1 #                   5,323 sentences
+  - opus_ELRA-W0272/v1 #                                      5,322 sentences
+  - opus_TildeMODEL/v2018 #                                   5,238 sentences
+  - opus_ELRC-1787-Press_Releases_PIO/v1 #                    5,163 sentences
+  - opus_ELRC-PIO_Publication/v1 #                            3,949 sentences
+  - opus_ELRC-1984-Hellenic_Gaming_Comm/v1 #                  3,875 sentences
+  - opus_ELRC_3382/v1 #                                       3,818 sentences
+  - opus_ELRC-932-Hellenic_Foreign_Aff/v1 #                   3,471 sentences
+  - opus_ELRA-W0271/v1 #                                      3,470 sentences
+  - opus_ELRC-1067-PIO_Publication_Wind/v1 #                  2,629 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,531 sentences
+  - opus_ELRC-3461-EC_EUROPA_covid/v1 #                       2,234 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,234 sentences
+  - opus_ELRC-1981-EQF_Referencing_Repo/v1 #                  2,100 sentences
+  - opus_ELRC-652-Greek_law/v1 #                              1,980 sentences
+  - opus_ELRA-W0205/v1 #                                      1,979 sentences
+  - opus_ELRC-3058-wikipedia_health/v1 #                      1,871 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           1,871 sentences
+  - opus_ELRC-1020-collection_about_Cyp/v1 #                  1,870 sentences
+  - opus_ELRC_2922/v1 #                                       1,870 sentences
+  - opus_ELRC-1070-Press_Information_Cy/v1 #                  1,863 sentences
+  - opus_ELRC-1970-governmental_about_M/v1 #                  1,435 sentences
+  - opus_ELRC-1065-PIO_Publication_Cypr/v1 #                  1,321 sentences
+  - opus_Books/v1 #                                           1,285 sentences
+  - opus_ELRC-419-Greek_legislation_An/v1 #                   1,070 sentences
+  - opus_ELRA-W0164/v1 #                                      1,069 sentences
+  - opus_ELRC-1986-Constitution_Greece/v1 #                   1,022 sentences
+  - opus_ELRC-3199-antibiotic/v1 #                              990 sentences
+  - opus_ELRC-5160-Press_Releases_PIO/v1 #                      955 sentences
+  - opus_ELRC-3290-EUROPARL_covid/v1 #                          692 sentences
+  - opus_ELRC-2735-vaccination/v1 #                             519 sentences
+  - opus_ELRC-vaccination/v1 #                                  519 sentences
+  - opus_ELRC-663-Memorandum_a_ESM/v1 #                         434 sentences
+  - opus_ELRA-W0210/v1 #                                        433 sentences
+  - opus_ELRC_2923/v1 #                                         420 sentences
+  - opus_ELRC-646-International_Judici/v1 #                     289 sentences
+  - opus_ELRA-W0307/v1 #                                        288 sentences
+  - mtdata_ELRC-rights_arrested-1-ell-eng
+  - mtdata_ELRC-swedish_social_security-1-ell-eng
+  - mtdata_ELRC-greek_legislation_anticorruption_plan-1-ell-eng
+  - mtdata_ELRC-convention_transfer_sentenced_persons-1-ell-eng
+  - mtdata_ELRC-international_judicial_cooperation_civil_matters-1-ell-eng
+  - mtdata_ELRC-letter_rights_persons_arrested_or_detained-1-ell-eng
+  - mtdata_ELRC-macroeconomic_developments-1-ell-eng
+  - mtdata_ELRC-methodological_reconciliation-1-ell-eng
+  - mtdata_ELRC-expression_interest-1-ell-eng
+  - mtdata_ELRC-memorandum_a_esm_programme-1-ell-eng
+  - mtdata_ELRC-convention_against_torture_other_cruel_inhuman_or_degrading_treatment_or_punishment_united_nations-1-ell-eng
+  - mtdata_ELRC-quarterly_reports_parliamentary_budget-1-ell-eng
+  - mtdata_ELRC-collection_reports_greek_power_corporation-1-ell-eng
+  - mtdata_ELRC-hellenic_foreign_affairs_announcements-1-ell-eng
+  - mtdata_ELRC-prime_minister_hellenic-1-ell-eng
+  - mtdata_ELRC-collection_about_cyprus_problem-1-ell-eng
+  - mtdata_ELRC-commitment_property_open-1-ell-eng
+  - mtdata_ELRC-compulsory_expropriation_process_greece-1-ell-eng
+  - mtdata_ELRC-pio_publication_cyprus_has_always_been_europe_2017-1-ell-eng
+  - mtdata_ELRC-pio_publication_window_cyprus-1-ell-eng
+  - mtdata_ELRC-press_information_cyprus-1-ell-eng
+  - mtdata_ELRC-governmental_about_migration_policy-1-ell-eng
+  - mtdata_ELRC-eqf_referencing_report-1-ell-eng
+  - mtdata_ELRC-hellenic_gaming_commission-1-ell-eng
+  - mtdata_ELRC-eu_publications_medical_v2-1-ell-eng
+  - mtdata_EU-eac_forms-1-ell-eng #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-ell-eng #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-ell-eng #                          ~1,178,828 sentences (133.2 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-ell #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-ell-eng #                    ~2,393,754 sentences (270.5 MB)
+  - mtdata_Tilde-ema-2016-ell-eng #                       ~244,548 sentences (27.6 MB)
+  - mtdata_Tilde-rapid-2016-ell-eng #                     ~586,564 sentences (66.3 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~27,097,343 sentences
+  mono-trg:
+  - news-crawl_news.2015  #          ~1,115,044 sentences (126M)
+  - news-crawl_news.2019 #          ~2,398,230 sentences (271M)
+  - news-crawl_news.2020 #          ~5,327,433 sentences (602M)
+  - news-crawl_news.2021 #          ~5,238,938 sentences (592M)
+  - news-crawl_news.2022 #          ~6,725,663 sentences (760M)
+  - news-crawl_news.2023 #          ~6,292,035 sentences (711M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-fi-spring-2024.yml b/configs/en-fi-spring-2024.yml
new file mode 100644
index 000000000..6d0d3c8a7
--- /dev/null
+++ b/configs/en-fi-spring-2024.yml
@@ -0,0 +1,242 @@
+# The initial configuration was generated using:
+# task config-generator -- en fi --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: fi
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-fin
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt18
+  - sacrebleu_aug-mix_wmt17
+  - sacrebleu_aug-mix_wmt17/tworefs
+  - sacrebleu_aug-mix_wmt16/B
+  - sacrebleu_aug-mix_wmt15
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt19
+  - sacrebleu_wmt18/test-ts
+  - sacrebleu_wmt17/B
+  - sacrebleu_wmt16
+  - sacrebleu_wmt16/tworefs
+
+  # The training data contains:
+  #   180,578,066 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (35,982,562 sentences)
+  #  - opus_ELRC-401-Swedish_Labour_Part2/v1 - not enough data  (171 sentences)
+  #  - opus_ELRC-406-Swedish_Labour_Part1/v1 - not enough data  (41 sentences)
+  #  - opus_ELRC-436-Swedish_Food/v1 - not enough data  (16 sentences)
+  #  - opus_ELRA-W0305/v1 - not enough data  (15 sentences)
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (0 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-hallituskausi_2007_2011-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-hallituskausi_2011_2015-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-www.norden.org-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-www.vtv.fi-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-www.visitestonia.com-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-valtioneuvosto.fi-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-vnk.fi-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-www.turku.fi-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-www.vero.fi-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-fin - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-fin - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-fin - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-fin - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-fin - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-fin - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-3-eng-fin - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-fin - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-fin - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-fin - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-fin - duplicate with opus
+  #  - mtdata_Statmt-europarl-9-fin-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-fin-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-10-fin-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-fin_FI - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-fin - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       35,982,562 sentences
+  - opus_ParaCrawl/v9 #                                  31,315,914 sentences
+  - opus_OpenSubtitles/v2018 #                           27,281,566 sentences
+  - opus_HPLT/v1.1 #                                     25,176,714 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,287,447 sentences
+  - opus_ELRC-4239-NTEU_TierA/v1 #                       12,855,266 sentences
+  - opus_CCAligned/v1 #                                   9,699,433 sentences
+  - opus_DGT/v2019 #                                      5,079,631 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   5,059,694 sentences
+  - opus_TildeMODEL/v2018 #                               3,059,563 sentences
+  - opus_XLEnt/v1.2 #                                     2,630,648 sentences
+  - opus_EUbookshop/v2 #                                  2,039,833 sentences
+  - opus_Europarl/v8 #                                    1,969,624 sentences
+  - opus_EMEA/v3 #                                        1,083,857 sentences
+  - opus_ELRC-2708-EMEA/v1 #                                753,744 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              457,342 sentences
+  - opus_WikiMatrix/v1 #                                    375,724 sentences
+  - opus_ELITR-ECA/v1 #                                     375,254 sentences
+  - opus_ECB/v1 #                                           157,603 sentences
+  - opus_ELRC-www.turku.fi/v1 #                             141,917 sentences
+  - opus_ELRC-Finnish_Information/v1 #                      127,638 sentences
+  - opus_ELRC-www.visitestonia.com/v1 #                     124,120 sentences
+  - opus_KDE4/v2 #                                          108,073 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        107,841 sentences
+  - opus_QED/v2.0a #                                        102,273 sentences
+  - opus_infopankki/v1 #                                     84,645 sentences
+  - opus_Tatoeba/v2023-04-12 #                               81,684 sentences
+  - opus_GNOME/v1 #                                          62,184 sentences
+  - opus_bible-uedin/v1 #                                    62,026 sentences
+  - opus_ELRC-724-Hallituskausi_2007_2/v1 #                  53,844 sentences
+  - opus_ELRA-W0220/v1 #                                     53,843 sentences
+  - opus_ELRC-1769-valtioneuvosto.fi/v1 #                    49,317 sentences
+  - opus_ELRC-valtioneuvosto.fi/v1 #                         49,317 sentences
+  - opus_TED2020/v1 #                                        44,447 sentences
+  - opus_wikimedia/v20230407 #                               43,055 sentences
+  - opus_ELRC-735-www.norden.org/v1 #                        42,742 sentences
+  - opus_ELRC-www.norden.org/v1 #                            42,742 sentences
+  - opus_ELRC-1127-www.vtv.fi/v1 #                           42,724 sentences
+  - opus_ELRC-www.vtv.fi/v1 #                                42,724 sentences
+  - opus_WMT-News/v2019 #                                    36,741 sentences
+  - opus_ELRC-1771-vnk.fi/v1 #                               31,527 sentences
+  - opus_ELRC-vnk.fi/v1 #                                    31,527 sentences
+  - opus_ELRC-725-Hallituskausi_2011_2/v1 #                  31,476 sentences
+  - opus_PHP/v1 #                                            27,879 sentences
+  - opus_NeuLab-TedTalks/v1 #                                26,761 sentences
+  - opus_ELRC-3559-EUR_LEX_covid/v1 #                        21,742 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   21,742 sentences
+  - opus_ELRC-2036-www.vero.fi/v1 #                          21,285 sentences
+  - opus_ELRC-www.vero.fi/v1 #                               21,285 sentences
+  - opus_JRC-Acquis/v3.0 #                                   19,665 sentences
+  - opus_ELRC-2032-www.turku.fi/v1 #                         17,674 sentences
+  - opus_ELRC-2017-EUIPO_2017/v1 #                           16,802 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                16,802 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            14,964 sentences
+  - opus_ELRC-1128-www.visitestonia.com/v1 #                 14,616 sentences
+  - opus_ELRC-2869-EU_publications_medi/v1 #                 12,943 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,943 sentences
+  - opus_ELRC-antibiotic/v1 #                                11,241 sentences
+  - opus_EUconst/v1 #                                        10,026 sentences
+  - opus_ELRC-716-Finnish_Information_/v1 #                   9,942 sentences
+  - opus_ELRA-W0217/v1 #                                      9,941 sentences
+  - opus_ELRC-3600-presscorner_covid/v1 #                     6,760 sentences
+  - opus_Books/v1 #                                           3,645 sentences
+  - opus_ELRC_3382/v1 #                                       3,358 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,618 sentences
+  - opus_ELRC-3458-EC_EUROPA_covid/v1 #                       2,600 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,600 sentences
+  - opus_ELRC-Swedish_Labour/v1 #                             1,780 sentences
+  - opus_ELRC-4995-Finnish_Financial_MT/v1 #                  1,001 sentences
+  - opus_ELRC-3196-antibiotic/v1 #                              885 sentences
+  - opus_ELRC-416-Swedish_Social_Secur/v1 #                     843 sentences
+  - opus_ELRC-416-Swedish_Social_Secur/v1 #                     842 sentences
+  - opus_ELRC-3287-EUROPARL_covid/v1 #                          696 sentences
+  - opus_ELRC-2739-vaccination/v1 #                             471 sentences
+  - opus_ELRC-vaccination/v1 #                                  471 sentences
+  - opus_ELRC_2923/v1 #                                         396 sentences
+  - opus_ELRC-3045-wikipedia_health/v1 #                        334 sentences
+  - opus_ELRC-wikipedia_health/v1 #                             334 sentences
+  - opus_ELRC_2922/v1 #                                         333 sentences
+  - mtdata_ELRC-swedish_labour_part2-1-eng-fin
+  - mtdata_ELRC-swedish_labour_part1-1-eng-fin
+  - mtdata_ELRC-swedish_social_security-1-eng-fin
+  - mtdata_ELRC-swedish_food-1-eng-fin
+  - mtdata_ELRC-finnish_information_bank-1-eng-fin
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-fin
+  - mtdata_ELRC-nteu_tierb-1-eng-fin
+  - mtdata_EU-eac_forms-1-eng-fin #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-fin #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-fin #                          ~1,039,474 sentences (117.5 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-fin #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-wiki_titles-1-fin-eng #                  ~45,145 sentences (5.1 MB)
+  - mtdata_Statmt-newsdev_fien-2015-fin-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_enfi-2015-eng-fin #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-fin #                    ~1,759,784 sentences (198.9 MB)
+  - mtdata_Tilde-ema-2016-eng-fin #                       ~222,060 sentences (25.1 MB)
+  - mtdata_Tilde-airbaltic-1-eng-fin #                        ~754 sentences (85.2 kB)
+  - mtdata_Tilde-rapid-2016-eng-fin #                     ~365,302 sentences (41.3 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~11,592,916 sentences
+  mono-trg:
+  - news-crawl_news.2015  #          ~1,601,769 sentences (181M)
+  - news-crawl_news.2016 #          ~1,336,283 sentences (151M)
+  - news-crawl_news.2017 #          ~1,265,486 sentences (143M)
+  - news-crawl_news.2018 #          ~1,035,398 sentences (117M)
+  - news-crawl_news.2019 #          ~1,672,566 sentences (189M)
+  - news-crawl_news.2020 #          ~1,407,079 sentences (159M)
+  - news-crawl_news.2021 #          ~1,106,194 sentences (125M)
+  - news-crawl_news.2022 #          ~1,070,796 sentences (121M)
+  - news-crawl_news.2023 #          ~1,097,345 sentences (124M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-hr-spring-2024.yml b/configs/en-hr-spring-2024.yml
new file mode 100644
index 000000000..a6f29917d
--- /dev/null
+++ b/configs/en-hr-spring-2024.yml
@@ -0,0 +1,225 @@
+# The initial configuration was generated using:
+# task config-generator -- en hr --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: hr
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-hrv
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   99,724,833 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (18,797,643 sentences)
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (9,310,276 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (2,266,005 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-croatian_bank-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-croatian_mine_action-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-agriculture-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-hrv - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-hrv - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-hrv - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-hrv_HR - duplicate with opus
+  train:
+  - opus_OpenSubtitles/v2018  #                           35,131,729 sentences
+  - opus_NLLB/v1 #                                       18,797,643 sentences
+  - opus_ELRC-EMEA/v1 #                                  10,890,456 sentences
+  - opus_CCAligned/v1 #                                   9,376,190 sentences
+  - opus_HPLT/v1.1 #                                      9,310,369 sentences
+  - opus_ParaCrawl/v9 #                                   3,240,485 sentences
+  - opus_XLEnt/v1.2 #                                     2,844,710 sentences
+  - opus_ELRC-4142-NTEU_TierA/v1 #                        2,290,893 sentences
+  - opus_MaCoCu/v2 #                                      2,266,007 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              806,581 sentences
+  - opus_TildeMODEL/v2018 #                                 745,616 sentences
+  - opus_DGT/v2019 #                                        722,182 sentences
+  - opus_ELRC-2706-EMEA/v1 #                                650,030 sentences
+  - opus_WikiMatrix/v1 #                                    259,499 sentences
+  - opus_QED/v2.0a #                                        208,129 sentences
+  - opus_SETIMES/v2 #                                       205,910 sentences
+  - opus_TED2020/v1 #                                       197,411 sentences
+  - opus_ELITR-ECA/v1 #                                     181,038 sentences
+  - opus_EuroPat/v3 #                                       154,775 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        140,795 sentences
+  - opus_ELRC-Regional_Development/v1 #                     136,809 sentences
+  - opus_NeuLab-TedTalks/v1 #                               128,233 sentences
+  - opus_ELRC-Rural_Development/v1 #                        105,562 sentences
+  - opus_hrenWaC/v1 #                                        99,001 sentences
+  - opus_KDE4/v2 #                                           87,333 sentences
+  - opus_TedTalks/v1 #                                       86,348 sentences
+  - opus_ELRC-2542-Agriculture/v1 #                          68,376 sentences
+  - opus_bible-uedin/v1 #                                    62,179 sentences
+  - opus_ELRC-4329-PRINCIPLE_MVEP_legal/v1 #                 44,460 sentences
+  - opus_wikimedia/v20230407 #                               42,034 sentences
+  - opus_GNOME/v1 #                                          35,429 sentences
+  - opus_ELRC-3556-EUR_LEX_covid/v1 #                        22,010 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,010 sentences
+  - opus_ELRC-651-government_websites_/v1 #                  21,341 sentences
+  - opus_ELRC-government_websites/v1 #                       21,341 sentences
+  - opus_ELRA-W0204/v1 #                                     21,340 sentences
+  - opus_ELRC-943-Journal_Croatian_Ass/v1 #                  18,478 sentences
+  - opus_ELRA-W0273/v1 #                                     18,477 sentences
+  - opus_ELRC-1015-Croatian_Mine_Action/v1 #                 17,602 sentences
+  - opus_ELRA-W0131/v1 #                                     17,601 sentences
+  - opus_ELRC-1174-EUIPO_2017/v1 #                           17,205 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,205 sentences
+  - opus_ELRC-2866-EU_publications_medi/v1 #                 12,837 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,837 sentences
+  - opus_ELRC-921-studies_challenges_C/v1 #                  11,781 sentences
+  - opus_ELRA-W0266/v1 #                                     11,780 sentences
+  - opus_ELRC-915-statistical_reports_/v1 #                  11,738 sentences
+  - opus_ELRC-statistical_reports/v1 #                       11,738 sentences
+  - opus_ELRA-W0264/v1 #                                     11,737 sentences
+  - opus_ELRC-788-Croatian_Bank/v1 #                         11,708 sentences
+  - opus_ELRA-W0226/v1 #                                     11,707 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            10,175 sentences
+  - opus_ELRC-2541-Regional_Development/v1 #                  7,911 sentences
+  - opus_ELRC-3597-presscorner_covid/v1 #                     6,645 sentences
+  - opus_EUbookshop/v2 #                                      6,104 sentences
+  - opus_ELRC-992-Rural_Development_Pr/v1 #                   5,202 sentences
+  - opus_ELRC_3382/v1 #                                       3,671 sentences
+  - opus_ELRC-989-Foreign_Affairs_Croa/v1 #                   3,103 sentences
+  - opus_ELRC-Foreign_Affairs/v1 #                            3,103 sentences
+  - opus_ELRA-W0293/v1 #                                      3,102 sentences
+  - opus_ELRC-3478-EC_EUROPA_covid/v1 #                       2,595 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,595 sentences
+  - opus_Tatoeba/v2023-04-12 #                                2,454 sentences
+  - opus_ELRC-991-Croatian_Journal_Fis/v1 #                   2,408 sentences
+  - opus_ELRA-W0294/v1 #                                      2,407 sentences
+  - opus_ELRC-1080-Acts_Biological_Land/v1 #                  2,329 sentences
+  - opus_ELRA-W0142/v1 #                                      2,328 sentences
+  - opus_ELRC-1058-University_Library_Z/v1 #                  2,310 sentences
+  - opus_ELRA-W0135/v1 #                                      2,309 sentences
+  - opus_ELRC-986-Embassy_Finland_Zagr/v1 #                   1,967 sentences
+  - opus_ELRA-W0292/v1 #                                      1,966 sentences
+  - opus_ELRC-1159-Swedish_Migration_Bo/v1 #                  1,112 sentences
+  - opus_ELRC-Swedish_Migration/v1 #                          1,112 sentences
+  - opus_ELRC-3193-antibiotic/v1 #                            1,070 sentences
+  - opus_ELRC-antibiotic/v1 #                                 1,070 sentences
+  - opus_ELRC-984-Government_Cooperati/v1 #                   1,026 sentences
+  - opus_ELRA-W0291/v1 #                                      1,025 sentences
+  - opus_ELRC-996-nature_protection_st/v1 #                     970 sentences
+  - opus_ELRC-825-Croatian_Swedish_Cri/v1 #                     907 sentences
+  - opus_ELRA-W0238/v1 #                                        906 sentences
+  - opus_ELRC-2753-vaccination/v1 #                             509 sentences
+  - opus_ELRC-vaccination/v1 #                                  509 sentences
+  - opus_ELRC_2922/v1 #                                         485 sentences
+  - opus_ELRC-3284-EUROPARL_covid/v1 #                          475 sentences
+  - opus_ELRC_2923/v1 #                                         288 sentences
+  - mtdata_ELRC-government_websites_croatian-1-eng-hrv
+  - mtdata_ELRC-croatian_swedish_crime_victim_compensation_support_authority-1-eng-hrv
+  - mtdata_ELRC-statistical_reports_studies_croatian_bureau_statistics-1-eng-hrv
+  - mtdata_ELRC-studies_challenges_croatian_accession_union_croatian_institute_finance-1-eng-hrv
+  - mtdata_ELRC-journal_croatian_association_civil_engineers-1-eng-hrv
+  - mtdata_ELRC-government_cooperation_ngos-1-eng-hrv
+  - mtdata_ELRC-embassy_finland_zagreb-1-eng-hrv
+  - mtdata_ELRC-foreign_affairs_croatia-1-eng-hrv
+  - mtdata_ELRC-croatian_journal_fisheries-1-eng-hrv
+  - mtdata_ELRC-rural_development_programme_period_2014_2020_croatian_rural_development_programme-1-eng-hrv
+  - mtdata_ELRC-nature_protection_strategy_croatia-1-eng-hrv
+  - mtdata_ELRC-university_library_zagreb-1-eng-hrv
+  - mtdata_ELRC-acts_biological_landscape_diversity_environmental_protection-1-eng-hrv
+  - mtdata_ELRC-swedish_migration_board_migrationsverket-1-eng-hrv
+  - mtdata_ELRC-regional_development_funds-1-eng-hrv
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-hrv
+  - mtdata_ELRC-wikipedia_health-1-eng-hrv
+  - mtdata_ELRC-nteu_tierb-1-eng-hrv
+  - mtdata_EU-eac_reference-1-eng-hrv #                    ~31,162 sentences (3.5 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-hrv #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-hrv #                      ~216,663 sentences (24.5 MB)
+  - mtdata_Tilde-ema-2016-eng-hrv #                       ~209,283 sentences (23.6 MB)
+  - mtdata_Tilde-ecb-2017-eng-hrv #                           ~876 sentences (99.0 kB)
+  - mtdata_Tilde-rapid-2016-eng-hrv #                      ~45,055 sentences (5.1 MB)
+  - mtdata_Tilde-worldbank-1-eng-hrv #                      ~1,566 sentences (177.0 kB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~11,498,228 sentences
+  mono-trg:
+  - news-crawl_news.2014  #             ~46,902 sentences (5.3M)
+  - news-crawl_news.2019 #          ~1,398,230 sentences (158M)
+  - news-crawl_news.2020 #          ~2,610,619 sentences (295M)
+  - news-crawl_news.2021 #          ~2,398,230 sentences (271M)
+  - news-crawl_news.2022 #          ~2,592,920 sentences (293M)
+  - news-crawl_news.2023 #          ~2,451,327 sentences (277M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-hu-spring-2024.yml b/configs/en-hu-spring-2024.yml
new file mode 100644
index 000000000..4392942d7
--- /dev/null
+++ b/configs/en-hu-spring-2024.yml
@@ -0,0 +1,190 @@
+# The initial configuration was generated using:
+# task config-generator -- en hu --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: hu
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Lindat-khresmoi_summary_dev-2-eng-hun
+  - mtdata_Neulab-tedtalks_dev-1-eng-hun
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt08
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt09
+
+  # The training data contains:
+  #   147,468,240 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (36,435,409 sentences)
+  #  - opus_ELRC-EMEA/v1 - not enough data  (0 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-euipo_2017-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-hun - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-hun - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-hun - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-hun - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-hun - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-hun - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-hun - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-hun - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-hun - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-hun - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-hun-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-hun_HU - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-hun - duplicate with opus
+  train:
+  - opus_OpenSubtitles/v2018  #                           42,655,519 sentences
+  - opus_NLLB/v1 #                                       36,435,409 sentences
+  - opus_ParaCrawl/v9 #                                  36,433,273 sentences
+  - opus_CCAligned/v1 #                                  11,586,886 sentences
+  - opus_DGT/v2019 #                                      5,074,777 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,716,457 sentences
+  - opus_XLEnt/v1.2 #                                     3,609,156 sentences
+  - opus_TildeMODEL/v2018 #                               1,958,734 sentences
+  - opus_EMEA/v3 #                                        1,050,606 sentences
+  - opus_ELRC-2715-EMEA/v1 #                                772,359 sentences
+  - opus_Europarl/v8 #                                      625,178 sentences
+  - opus_WikiMatrix/v1 #                                    488,319 sentences
+  - opus_JRC-Acquis/v3.0 #                                  487,829 sentences
+  - opus_EUbookshop/v2 #                                    438,264 sentences
+  - opus_QED/v2.0a #                                        335,038 sentences
+  - opus_TED2020/v1 #                                       308,341 sentences
+  - opus_ELITR-ECA/v1 #                                     299,216 sentences
+  - opus_NeuLab-TedTalks/v1 #                               159,437 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        139,284 sentences
+  - opus_Books/v1 #                                         137,151 sentences
+  - opus_KDE4/v2 #                                          120,657 sentences
+  - opus_Tatoeba/v2023-04-12 #                              116,774 sentences
+  - opus_wikimedia/v20230407 #                               91,028 sentences
+  - opus_ECB/v1 #                                            72,034 sentences
+  - opus_bible-uedin/v1 #                                    62,121 sentences
+  - opus_Wikipedia/v1.0 #                                    61,472 sentences
+  - opus_PHP/v1 #                                            35,423 sentences
+  - opus_ELRC-5067-SciPar/v1 #                               27,422 sentences
+  - opus_ELRC-3566-EUR_LEX_covid/v1 #                        22,271 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,271 sentences
+  - opus_ELRC-2019-EUIPO_2017/v1 #                           17,038 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,038 sentences
+  - opus_GlobalVoices/v2018q4 #                              15,362 sentences
+  - opus_ELRC-2876-EU_publications_medi/v1 #                 13,026 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,026 sentences
+  - opus_ELRC-antibiotic/v1 #                                10,272 sentences
+  - opus_EUconst/v1 #                                         8,748 sentences
+  - opus_ELRC-3607-presscorner_covid/v1 #                     6,599 sentences
+  - opus_GNOME/v1 #                                           6,312 sentences
+  - opus_ELRC_3382/v1 #                                       3,564 sentences
+  - opus_WMT-News/v2019 #                                     3,027 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,572 sentences
+  - opus_ELRC-3465-EC_EUROPA_covid/v1 #                       2,497 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,497 sentences
+  - opus_ELRC-3203-antibiotic/v1 #                              687 sentences
+  - opus_ELRC-2744-vaccination/v1 #                             518 sentences
+  - opus_ELRC-vaccination/v1 #                                  518 sentences
+  - opus_ELRC-3294-EUROPARL_covid/v1 #                          410 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                               410 sentences
+  - opus_ELRC-3064-wikipedia_health/v1 #                        401 sentences
+  - opus_ELRC-wikipedia_health/v1 #                             401 sentences
+  - opus_ELRC_2922/v1 #                                         400 sentences
+  - opus_ELRC_2923/v1 #                                         211 sentences
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-hun
+  - mtdata_EU-eac_forms-1-eng-hun #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-hun #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-hun #                            ~546,644 sentences (61.8 MB)
+  - mtdata_Lindat-khresmoi_summary_test-2-eng-hun #        ~11,808 sentences (1.3 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-hun #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-hun #                    ~1,098,560 sentences (124.1 MB)
+  - mtdata_Tilde-ema-2016-eng-hun #                       ~237,326 sentences (26.8 MB)
+  - mtdata_Tilde-rapid-2016-eng-hun #                     ~219,863 sentences (24.8 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~30,008,843 sentences
+  mono-trg:
+  - news-crawl_news.2007  #             ~26,548 sentences (3.0M)
+  - news-crawl_news.2008 #          ~1,150,442 sentences (130M)
+  - news-crawl_news.2009 #          ~1,070,796 sentences (121M)
+  - news-crawl_news.2011 #          ~2,743,362 sentences (310M)
+  - news-crawl_news.2012 #          ~2,495,575 sentences (282M)
+  - news-crawl_news.2013 #          ~2,318,584 sentences (262M)
+  - news-crawl_news.2014 #          ~1,876,106 sentences (212M)
+  - news-crawl_news.2015 #          ~1,805,309 sentences (204M)
+  - news-crawl_news.2016 #          ~1,752,212 sentences (198M)
+  - news-crawl_news.2017 #          ~2,061,946 sentences (233M)
+  - news-crawl_news.2018 #          ~1,814,159 sentences (205M)
+  - news-crawl_news.2019 #          ~2,176,991 sentences (246M)
+  - news-crawl_news.2020 #          ~2,238,938 sentences (253M)
+  - news-crawl_news.2021 #          ~1,831,858 sentences (207M)
+  - news-crawl_news.2022 #          ~2,274,336 sentences (257M)
+  - news-crawl_news.2023 #          ~2,371,681 sentences (268M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-id-spring-2024.yml b/configs/en-id-spring-2024.yml
new file mode 100644
index 000000000..3b743d12c
--- /dev/null
+++ b/configs/en-id-spring-2024.yml
@@ -0,0 +1,117 @@
+# The initial configuration was generated using:
+# task config-generator -- en id --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: id
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ind
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   102,103,778 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (70,545,705 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-ind - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-ind - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-14-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-15-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-16-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-ind_ID - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       70,545,705 sentences
+  - opus_CCAligned/v1 #                                  15,700,345 sentences
+  - opus_OpenSubtitles/v2018 #                            9,268,181 sentences
+  - opus_XLEnt/v1.2 #                                     4,179,174 sentences
+  - opus_WikiMatrix/v1 #                                  1,019,171 sentences
+  - opus_Tanzil/v1 #                                        393,552 sentences
+  - opus_wikimedia/v20230407 #                              284,126 sentences
+  - opus_QED/v2.0a #                                        274,581 sentences
+  - opus_TED2020/v1 #                                       165,059 sentences
+  - opus_NeuLab-TedTalks/v1 #                                95,295 sentences
+  - opus_bible-uedin/v1 #                                    59,363 sentences
+  - opus_GNOME/v1 #                                          47,234 sentences
+  - opus_News-Commentary/v16 #                               18,054 sentences
+  - opus_GlobalVoices/v2018q4 #                              16,043 sentences
+  - opus_KDE4/v2 #                                           14,782 sentences
+  - opus_Tatoeba/v2023-04-12 #                               10,550 sentences
+  - opus_tico-19/v2020-10-28 #                                3,071 sentences
+  - opus_ELRC-3049-wikipedia_health/v1 #                      2,680 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           2,680 sentences
+  - opus_ELRC_2922/v1 #                                       2,679 sentences
+  - opus_tldr-pages/v2023-08-29 #                             1,453 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-ind #             ~3,117,009 sentences (352.2 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-trg: []
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-lt-spring-2024.yml b/configs/en-lt-spring-2024.yml
new file mode 100644
index 000000000..2a88aaa00
--- /dev/null
+++ b/configs/en-lt-spring-2024.yml
@@ -0,0 +1,192 @@
+# The initial configuration was generated using:
+# task config-generator -- en lt --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: lt
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-lit
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt19/dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt19
+
+  # The training data contains:
+  #   76,643,900 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (23,298,470 sentences)
+  #  - opus_ELRC-3069-wikipedia_health/v1 - not enough data  (136 sentences)
+  #  - opus_ELRC-wikipedia_health/v1 - not enough data  (136 sentences)
+  #  - opus_ELRC_2922/v1 - not enough data  (135 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-president_lithuania-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-www.lrs.lt-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-www.lb.lt-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-kam.lt-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-lit - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-lit - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-lit - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-3-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-lit - duplicate with opus
+  #  - mtdata_Statmt-europarl-9-lit-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-lit-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-10-lit-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-lit_LT - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-lit - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       23,298,470 sentences
+  - opus_ParaCrawl/v9 #                                  13,192,237 sentences
+  - opus_ELRC-EMEA/v1 #                                  11,487,359 sentences
+  - opus_ELRC-4270-NTEU_TierA/v1 #                        8,061,918 sentences
+  - opus_CCAligned/v1 #                                   5,215,271 sentences
+  - opus_DGT/v2019 #                                      5,061,918 sentences
+  - opus_TildeMODEL/v2018 #                               2,084,002 sentences
+  - opus_XLEnt/v1.2 #                                     1,642,943 sentences
+  - opus_OpenSubtitles/v2018 #                            1,415,961 sentences
+  - opus_EMEA/v3 #                                        1,042,425 sentences
+  - opus_JRC-Acquis/v3.0 #                                  790,475 sentences
+  - opus_ELRC-2717-EMEA/v1 #                                764,031 sentences
+  - opus_Europarl/v8 #                                      634,284 sentences
+  - opus_EUbookshop/v2 #                                    445,813 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              177,437 sentences
+  - opus_WikiMatrix/v1 #                                    157,526 sentences
+  - opus_ELITR-ECA/v1 #                                     147,678 sentences
+  - opus_ELRC-425-Lithuanian_legislati/v1 #                 130,549 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        117,054 sentences
+  - opus_KDE4/v2 #                                          104,044 sentences
+  - opus_QED/v2.0a #                                         85,435 sentences
+  - opus_TED2020/v1 #                                        75,484 sentences
+  - opus_ECB/v1 #                                            69,805 sentences
+  - opus_bible-uedin/v1 #                                    62,187 sentences
+  - opus_GNOME/v1 #                                          59,776 sentences
+  - opus_NeuLab-TedTalks/v1 #                                45,963 sentences
+  - opus_ELRC-591-www.lb.lt/v1 #                             33,261 sentences
+  - opus_ELRC-3568-EUR_LEX_covid/v1 #                        21,390 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   21,390 sentences
+  - opus_ELRC-405-President_Lithuania/v1 #                   21,225 sentences
+  - opus_ELRA-W0160/v1 #                                     21,224 sentences
+  - opus_ELRC-2021-EUIPO_2017/v1 #                           17,133 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,133 sentences
+  - opus_wikimedia/v20230407 #                               14,454 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            13,851 sentences
+  - opus_ELRC-antibiotic/v1 #                                12,602 sentences
+  - opus_ELRC-2878-EU_publications_medi/v1 #                 12,581 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,581 sentences
+  - opus_EUconst/v1 #                                        10,171 sentences
+  - opus_ELRC-592-kam.lt/v1 #                                 8,531 sentences
+  - opus_Tatoeba/v2023-04-12 #                                8,236 sentences
+  - opus_ELRC-3609-presscorner_covid/v1 #                     6,462 sentences
+  - opus_WMT-News/v2019 #                                     5,998 sentences
+  - opus_ELRC_3382/v1 #                                       3,587 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,546 sentences
+  - opus_ELRC-3467-EC_EUROPA_covid/v1 #                       2,438 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,438 sentences
+  - opus_ELRC-590-www.lrs.lt/v1 #                             1,771 sentences
+  - opus_ELRC-3205-antibiotic/v1 #                              823 sentences
+  - opus_ELRC-3296-EUROPARL_covid/v1 #                          553 sentences
+  - opus_ELRC-2740-vaccination/v1 #                             546 sentences
+  - opus_ELRC-vaccination/v1 #                                  546 sentences
+  - opus_ELRC_2923/v1 #                                         384 sentences
+  - mtdata_ELRC-lithuanian_legislation_seimas_lithuania-1-eng-lit
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-lit
+  - mtdata_ELRC-wikipedia_health-1-eng-lit
+  - mtdata_ELRC-nteu_tierb-1-eng-lit
+  - mtdata_EU-eac_forms-1-eng-lit #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-lit #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-lit #                            ~510,025 sentences (57.6 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-lit #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-wiki_titles-1-lit-eng #                  ~15,267 sentences (1.7 MB)
+  - mtdata_Statmt-newsdev_enlt-2019-eng-lit #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_lten-2019-lit-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-lit #                    ~1,149,015 sentences (129.8 MB)
+  - mtdata_Tilde-ema-2016-eng-lit #                       ~228,287 sentences (25.8 MB)
+  - mtdata_Tilde-airbaltic-1-eng-lit #                        ~962 sentences (108.7 kB)
+  - mtdata_Tilde-rapid-2016-eng-lit #                     ~180,798 sentences (20.4 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~5,442,476 sentences
+  mono-trg:
+  - news-crawl_news.2019  #          ~1,079,646 sentences (122M)
+  - news-crawl_news.2020 #          ~1,088,495 sentences (123M)
+  - news-crawl_news.2021 #          ~1,008,849 sentences (114M)
+  - news-crawl_news.2022 #          ~1,079,646 sentences (122M)
+  - news-crawl_news.2023 #          ~1,185,840 sentences (134M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-lv-spring-2024.yml b/configs/en-lv-spring-2024.yml
new file mode 100644
index 000000000..6585c4ba8
--- /dev/null
+++ b/configs/en-lv-spring-2024.yml
@@ -0,0 +1,194 @@
+# The initial configuration was generated using:
+# task config-generator -- en lv --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: lv
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt17/dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt17
+
+  # The training data contains:
+  #   68,374,368 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (16,685,969 sentences)
+  #  - opus_ELRC-3089-wikipedia_health/v1 - not enough data  (143 sentences)
+  #  - opus_ELRC-wikipedia_health/v1 - not enough data  (143 sentences)
+  #  - opus_ELRC_2922/v1 - not enough data  (142 sentences)
+  #  - opus_ELRA-W0308/v1 - not enough data  (108 sentences)
+  #  - opus_ELRC-648-Letter_rights_person/v1 - not enough data  (84 sentences)
+  #  - opus_ELRC-403-Rights_Arrested/v1 - not enough data  (23 sentences)
+  #  - opus_ELRA-W0301/v1 - not enough data  (20 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-mfa_latvia-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-state_latvian-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-www.visitestonia.com-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-covid19.gov.lv-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-lav - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-lav - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-lav-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-lav_LV - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-lav - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       16,685,969 sentences
+  - opus_ParaCrawl/v9 #                                  13,064,066 sentences
+  - opus_ELRC-EMEA/v1 #                                  11,795,507 sentences
+  - opus_ELRC-4269-NTEU_TierA/v1 #                        8,072,484 sentences
+  - opus_DGT/v2019 #                                      5,072,124 sentences
+  - opus_CCAligned/v1 #                                   4,850,972 sentences
+  - opus_TildeMODEL/v2018 #                               2,111,785 sentences
+  - opus_XLEnt/v1.2 #                                     1,295,887 sentences
+  - opus_EMEA/v3 #                                        1,030,272 sentences
+  - opus_JRC-Acquis/v3.0 #                                  793,589 sentences
+  - opus_ELRC-2729-EMEA/v1 #                                783,490 sentences
+  - opus_Europarl/v8 #                                      639,318 sentences
+  - opus_OpenSubtitles/v2018 #                              519,553 sentences
+  - opus_EUbookshop/v2 #                                    445,891 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              347,473 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        128,895 sentences
+  - opus_KDE4/v2 #                                           91,386 sentences
+  - opus_QED/v2.0a #                                         72,447 sentences
+  - opus_ECB/v1 #                                            65,374 sentences
+  - opus_ELITR-ECA/v1 #                                      64,115 sentences
+  - opus_TED2020/v1 #                                        55,488 sentences
+  - opus_ELRC-399-International_Agreem/v1 #                  40,897 sentences
+  - opus_ELRA-W0158/v1 #                                     40,896 sentences
+  - opus_ELRC-3578-EUR_LEX_covid/v1 #                        22,476 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,476 sentences
+  - opus_wikimedia/v20230407 #                               21,295 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            17,831 sentences
+  - opus_ELRC-2022-EUIPO_2017/v1 #                           17,255 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,255 sentences
+  - opus_bible-uedin/v1 #                                    15,885 sentences
+  - opus_ELRC-1130-www.visitestonia.com/v1 #                 13,841 sentences
+  - opus_ELRC-www.visitestonia.com/v1 #                      13,841 sentences
+  - opus_ELRC-2888-EU_publications_medi/v1 #                 13,045 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,045 sentences
+  - opus_ELRC-antibiotic/v1 #                                12,048 sentences
+  - opus_ELRC-715-Finance_Economics_Ba/v1 #                  11,600 sentences
+  - opus_ELRA-W0216/v1 #                                     11,599 sentences
+  - opus_GNOME/v1 #                                          11,265 sentences
+  - opus_EUconst/v1 #                                        10,036 sentences
+  - opus_WMT-News/v2019 #                                     8,008 sentences
+  - opus_ELRC-402-MFA_Latvia/v1 #                             7,195 sentences
+  - opus_ELRA-W0159/v1 #                                      7,194 sentences
+  - opus_ELRC-433-State_Latvian/v1 #                          6,862 sentences
+  - opus_ELRA-W0169/v1 #                                      6,861 sentences
+  - opus_ELRC-3619-presscorner_covid/v1 #                     6,686 sentences
+  - opus_ELRC_3382/v1 #                                       3,737 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,543 sentences
+  - opus_ELRC-3477-EC_EUROPA_covid/v1 #                       2,407 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,407 sentences
+  - opus_ELRC-4994-Latvian_Financial_MT/v1 #                  2,002 sentences
+  - opus_Tatoeba/v2023-04-12 #                                1,814 sentences
+  - opus_ELRC-3453-covid19.gov.lv/v1 #                          826 sentences
+  - opus_ELRC-3217-antibiotic/v1 #                              809 sentences
+  - opus_ELRC-3306-EUROPARL_covid/v1 #                          724 sentences
+  - opus_ELRC_2923/v1 #                                         580 sentences
+  - opus_ELRC-2741-vaccination/v1 #                             521 sentences
+  - opus_ELRC-vaccination/v1 #                                  521 sentences
+  - mtdata_ELRC-international_agreements-1-eng-lav
+  - mtdata_ELRC-rights_arrested-1-eng-lav
+  - mtdata_ELRC-letter_rights_persons_arrested_or_detained-1-eng-lav
+  - mtdata_ELRC-finance_economics_bank_latvia-1-eng-lav
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-lav
+  - mtdata_ELRC-wikipedia_health-1-eng-lav
+  - mtdata_ELRC-nteu_tierb-1-eng-lav
+  - mtdata_EU-eac_forms-1-eng-lav #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-lav #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-lav #                            ~524,054 sentences (59.2 MB)
+  - mtdata_Statmt-newsdev_lven-2017-lav-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_enlv-2017-eng-lav #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-lav #                    ~1,122,956 sentences (126.9 MB)
+  - mtdata_Tilde-ema-2016-eng-lav #                       ~231,439 sentences (26.2 MB)
+  - mtdata_Tilde-airbaltic-1-eng-lav #                      ~1,050 sentences (118.7 kB)
+  - mtdata_Tilde-fold-1-eng-lav #                          ~10,070 sentences (1.1 MB)
+  - mtdata_Tilde-rapid-2016-eng-lav #                     ~198,906 sentences (22.5 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~3,283,185 sentences
+  mono-trg:
+  - news-crawl_news.2015  #          ~1,274,336 sentences (144M)
+  - news-crawl_news.2016 #          ~1,017,699 sentences (115M)
+  - news-crawl_news.2017 #            ~991,150 sentences (112M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-ro-spring-2024.yml b/configs/en-ro-spring-2024.yml
new file mode 100644
index 000000000..553250c0b
--- /dev/null
+++ b/configs/en-ro-spring-2024.yml
@@ -0,0 +1,219 @@
+# The initial configuration was generated using:
+# task config-generator -- en ro --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: ro
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ron
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt16/dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt16
+
+  # The training data contains:
+  #   174,698,415 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (55,607,023 sentences)
+  #  - opus_ELRA-W0308/v1 - not enough data  (92 sentences)
+  #  - opus_ELRC-648-Letter_rights_person/v1 - not enough data  (77 sentences)
+  #  - opus_ELRC-403-Rights_Arrested/v1 - not enough data  (24 sentences)
+  #  - opus_ELRA-W0301/v1 - not enough data  (21 sentences)
+  #  - opus_tldr-pages/v2023-08-29 - not enough data  (9 sentences)
+  #  - opus_ELRC-417-Swedish_Work_Environ/v1 - not enough data  (8 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-romanian_literature-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-romanian_wikipedia-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-romanian_news-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eir_spos-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eir_newsletter-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eir-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-ron - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-ron - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-ron - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-ron - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-ron-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-ron_RO - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       55,607,023 sentences
+  - opus_OpenSubtitles/v2018 #                           50,693,226 sentences
+  - opus_ParaCrawl/v9 #                                  25,048,962 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,648,577 sentences
+  - opus_CCAligned/v1 #                                  10,525,602 sentences
+  - opus_DGT/v2019 #                                      3,541,661 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,421,073 sentences
+  - opus_XLEnt/v1.2 #                                     3,337,016 sentences
+  - opus_TildeMODEL/v2018 #                               1,925,419 sentences
+  - opus_EMEA/v3 #                                          994,499 sentences
+  - opus_ELRC-2728-EMEA/v1 #                                783,742 sentences
+  - opus_WikiMatrix/v1 #                                    631,486 sentences
+  - opus_JRC-Acquis/v3.0 #                                  455,171 sentences
+  - opus_QED/v2.0a #                                        438,832 sentences
+  - opus_Europarl/v8 #                                      400,356 sentences
+  - opus_Wikipedia/v1.0 #                                   360,499 sentences
+  - opus_TED2020/v1 #                                       328,491 sentences
+  - opus_EUbookshop/v2 #                                    324,553 sentences
+  - opus_wikimedia/v20230407 #                              323,049 sentences
+  - opus_SETIMES/v2 #                                       213,047 sentences
+  - opus_NeuLab-TedTalks/v1 #                               196,122 sentences
+  - opus_TED2013/v1.1 #                                     158,483 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        153,650 sentences
+  - opus_Tanzil/v1 #                                        136,175 sentences
+  - opus_ELRC-492-Romanian_Wikipedia/v1 #                   132,230 sentences
+  - opus_ELRA-W0193/v1 #                                    132,229 sentences
+  - opus_KDE4/v2 #                                          114,741 sentences
+  - opus_ELRC-493-Romanian_news/v1 #                         98,099 sentences
+  - opus_ELRA-W0194/v1 #                                     98,098 sentences
+  - opus_ELITR-ECA/v1 #                                      92,826 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_PHP/v1 #                                            30,391 sentences
+  - opus_GNOME/v1 #                                          25,419 sentences
+  - opus_ELRC-3577-EUR_LEX_covid/v1 #                        23,183 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   23,183 sentences
+  - opus_ELRC-1177-EUIPO_2017/v1 #                           20,298 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                20,298 sentences
+  - opus_Tatoeba/v2023-04-12 #                               16,308 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          13,252 sentences
+  - opus_ELRC-2887-EU_publications_medi/v1 #                 13,164 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,164 sentences
+  - opus_ELRC-930-studies_reports_stat/v1 #                  12,043 sentences
+  - opus_ELRA-W0270/v1 #                                     12,042 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            10,906 sentences
+  - opus_WMT-News/v2019 #                                     7,996 sentences
+  - opus_ELRC-3618-presscorner_covid/v1 #                     6,715 sentences
+  - opus_ELRC-435-Romanian_New_Crimina/v1 #                   6,496 sentences
+  - opus_ELRA-W0170/v1 #                                      6,495 sentences
+  - opus_ELRC-491-Romanian_literature/v1 #                    5,281 sentences
+  - opus_ELRA-W0192/v1 #                                      5,280 sentences
+  - opus_ELRC-1819-EIR/v1 #                                   4,994 sentences
+  - opus_GlobalVoices/v2018q4 #                               4,454 sentences
+  - opus_ELRC-1992-Rural_Development_Pr/v1 #                  4,186 sentences
+  - opus_ELRC-Rural_Development/v1 #                          4,186 sentences
+  - opus_ELRC-654-Romanian_Ombudsman_a/v1 #                   4,148 sentences
+  - opus_ELRA-W0206/v1 #                                      4,147 sentences
+  - opus_ELRC-1815-EIR_Newsletter/v1 #                        3,788 sentences
+  - opus_ELRC_3382/v1 #                                       3,674 sentences
+  - opus_ELRC-1814-EIR_SPOS/v1 #                              3,248 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,556 sentences
+  - opus_ELRC-3476-EC_EUROPA_covid/v1 #                       2,338 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,338 sentences
+  - opus_ELRC-3216-antibiotic/v1 #                            1,035 sentences
+  - opus_ELRC-antibiotic/v1 #                                 1,035 sentences
+  - opus_ELRC-3087-wikipedia_health/v1 #                        693 sentences
+  - opus_ELRC_2922/v1 #                                         692 sentences
+  - opus_ELRC-3305-EUROPARL_covid/v1 #                          546 sentences
+  - opus_ELRC-2750-vaccination/v1 #                             496 sentences
+  - opus_ELRC-vaccination/v1 #                                  496 sentences
+  - opus_ELRC_2923/v1 #                                         319 sentences
+  - mtdata_ELRC-rights_arrested-1-eng-ron
+  - mtdata_ELRC-swedish_work_environment-1-eng-ron
+  - mtdata_ELRC-romanian_new_criminal_procedure_code-1-eng-ron
+  - mtdata_ELRC-letter_rights_persons_arrested_or_detained-1-eng-ron
+  - mtdata_ELRC-romanian_ombudsman_archive-1-eng-ron
+  - mtdata_ELRC-studies_reports_statistical_culture_institute_cultural_research_training-1-eng-ron
+  - mtdata_ELRC-rural_development_programme_romania-1-eng-ron
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-ron
+  - mtdata_EU-eac_forms-1-eng-ron #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-ron #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-ron #                            ~389,297 sentences (44.0 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-ron #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-newsdev_enro-2016-eng-ron #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_roen-2016-ron-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-ron #                    ~1,026,056 sentences (115.9 MB)
+  - mtdata_Tilde-ema-2016-eng-ron #                       ~229,130 sentences (25.9 MB)
+  - mtdata_Tilde-ecb-2017-eng-ron #                         ~1,778 sentences (200.9 kB)
+  - mtdata_Tilde-rapid-2016-eng-ron #                     ~196,150 sentences (22.2 MB)
+  - mtdata_Tilde-worldbank-1-eng-ron #                      ~6,413 sentences (724.7 kB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~24,920,348 sentences
+  mono-trg:
+  - news-crawl_news.2015  #          ~1,088,495 sentences (123M)
+  - news-crawl_news.2016 #          ~2,061,946 sentences (233M)
+  - news-crawl_news.2017 #          ~2,247,787 sentences (254M)
+  - news-crawl_news.2018 #          ~1,345,132 sentences (152M)
+  - news-crawl_news.2019 #          ~3,283,185 sentences (371M)
+  - news-crawl_news.2020 #          ~3,982,300 sentences (450M)
+  - news-crawl_news.2021 #          ~3,353,982 sentences (379M)
+  - news-crawl_news.2022 #          ~3,831,858 sentences (433M)
+  - news-crawl_news.2023 #          ~3,725,663 sentences (421M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-ru-spring-2024.yml b/configs/en-ru-spring-2024.yml
new file mode 100644
index 000000000..1f4a55075
--- /dev/null
+++ b/configs/en-ru-spring-2024.yml
@@ -0,0 +1,174 @@
+# The initial configuration was generated using:
+# task config-generator -- en ru --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: ru
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-rus
+  - mtdata_UN-un_dev-1-eng-rus
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_mtedx/test
+  - sacrebleu_aug-mix_wmt20
+  - sacrebleu_aug-mix_wmt18
+  - sacrebleu_aug-mix_wmt17
+  - sacrebleu_aug-mix_wmt15
+  - sacrebleu_aug-mix_wmt14/full
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_mtedx/valid
+  - sacrebleu_wmt20/tworefs
+  - sacrebleu_wmt19
+  - sacrebleu_wmt18/test-ts
+  - sacrebleu_wmt16
+  - sacrebleu_wmt14
+  - sacrebleu_wmt13
+
+  # The training data contains:
+  #   250,111,081 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (139,937,785 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_ELRC-3855-SWPS_University_Soci/v1 - not enough data  (109 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - opus_WikiTitles/v3 - ignored datasets (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-rus - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-rus - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-rus - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-rus - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-1_bonus-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-14-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-15-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-16-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-rus_RU - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                      139,937,785 sentences
+  - opus_OpenSubtitles/v2018 #                           25,910,105 sentences
+  - opus_UNPC/v1.0 #                                     25,173,398 sentences
+  - opus_CCAligned/v1 #                                  13,850,305 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                  13,565,182 sentences
+  - opus_MultiUN/v1 #                                    11,654,416 sentences
+  - opus_XLEnt/v1.2 #                                     7,890,088 sentences
+  - opus_ParaCrawl/v9 #                                   5,378,016 sentences
+  - opus_WikiMatrix/v1 #                                  1,661,909 sentences
+  - opus_Tanzil/v1 #                                      1,067,840 sentences
+  - opus_Wikipedia/v1.0 #                                   572,717 sentences
+  - opus_QED/v2.0a #                                        563,700 sentences
+  - opus_wikimedia/v20230407 #                              541,583 sentences
+  - opus_Tatoeba/v2023-04-12 #                              540,675 sentences
+  - opus_TED2020/v1 #                                       390,015 sentences
+  - opus_News-Commentary/v16 #                              265,809 sentences
+  - opus_NeuLab-TedTalks/v1 #                               221,999 sentences
+  - opus_KDE4/v2 #                                          180,793 sentences
+  - opus_GlobalVoices/v2018q4 #                             170,351 sentences
+  - opus_TED2013/v1.1 #                                     133,660 sentences
+  - opus_ELRC-5183-SciPar_Ukraine/v1 #                      126,585 sentences
+  - opus_infopankki/v1 #                                     75,305 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_EUbookshop/v2 #                                     49,830 sentences
+  - opus_WMT-News/v2019 #                                    36,637 sentences
+  - opus_PHP/v1 #                                            30,064 sentences
+  - opus_Books/v1 #                                          17,496 sentences
+  - opus_TildeMODEL/v2018 #                                  10,977 sentences
+  - opus_MDN_Web_Docs/v2023-09-25 #                           8,134 sentences
+  - opus_ada83/v1 #                                           4,122 sentences
+  - opus_ELRC-3075-wikipedia_health/v1 #                      4,073 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           4,073 sentences
+  - opus_ELRC_2922/v1 #                                       4,072 sentences
+  - opus_tico-19/v2020-10-28 #                                3,071 sentences
+  - opus_ELRC-5067-SciPar/v1 #                                3,064 sentences
+  - opus_tldr-pages/v2023-08-29 #                             1,037 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-rus #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-commoncrawl_wmt13-1-rus-eng #         ~8,126,649 sentences (918.3 MB)
+  - mtdata_Statmt-news_commentary_wmt18-13-rus-eng #    ~1,001,393 sentences (113.2 MB)
+  - mtdata_Statmt-wiki_titles-1-rus-eng #                 ~179,637 sentences (20.3 MB)
+  - mtdata_Statmt-wiki_titles-2-rus-eng #                 ~193,345 sentences (21.8 MB)
+  - mtdata_Tilde-airbaltic-1-eng-rus #                      ~1,288 sentences (145.6 kB)
+  - mtdata_Tilde-czechtourism-1-eng-rus #                   ~7,561 sentences (854.5 kB)
+  - mtdata_Tilde-worldbank-1-eng-rus #                     ~33,049 sentences (3.7 MB)
+  - mtdata_UN-un_test-1-eng-rus
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~90,385,836 sentences
+  mono-trg:
+  - news-crawl_news.2008  #             ~19,469 sentences (2.2M)
+  - news-crawl_news.2009 #             ~47,787 sentences (5.4M)
+  - news-crawl_news.2011 #          ~4,876,106 sentences (551M)
+  - news-crawl_news.2012 #          ~5,079,646 sentences (574M)
+  - news-crawl_news.2013 #          ~7,327,433 sentences (828M)
+  - news-crawl_news.2014 #          ~6,194,690 sentences (700M)
+  - news-crawl_news.2015 #          ~5,433,628 sentences (614M)
+  - news-crawl_news.2016 #          ~3,716,814 sentences (420M)
+  - news-crawl_news.2017 #          ~4,451,327 sentences (503M)
+  - news-crawl_news.2018 #          ~4,539,823 sentences (513M)
+  - news-crawl_news.2019 #          ~6,955,752 sentences (786M)
+  - news-crawl_news.2020 #          ~8,849,557 sentences (1.0G)
+  - news-crawl_news.2021 #          ~8,115,044 sentences (917M)
+  - news-crawl_news.2022 #          ~8,849,557 sentences (1.0G)
+  - news-crawl_news.2023 #         ~15,929,203 sentences (1.8G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-sk-spring-2024.yml b/configs/en-sk-spring-2024.yml
new file mode 100644
index 000000000..2ee3a729d
--- /dev/null
+++ b/configs/en-sk-spring-2024.yml
@@ -0,0 +1,180 @@
+# The initial configuration was generated using:
+# task config-generator -- en sk --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: sk
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-slk
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   111,168,672 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (38,096,241 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_ELRC-3076-wikipedia_health/v1 - not enough data  (134 sentences)
+  #  - opus_ELRC-wikipedia_health/v1 - not enough data  (134 sentences)
+  #  - opus_ELRC_2922/v1 - not enough data  (133 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-culture_slovak-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-justice_slovak-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-slk - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-slk - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-slk - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-slk - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-slk-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-slk_SK - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-slk - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       38,096,241 sentences
+  - opus_ParaCrawl/v9 #                                  22,902,149 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,032,452 sentences
+  - opus_OpenSubtitles/v2018 #                            8,850,871 sentences
+  - opus_ELRC-4154-NTEU_TierA/v1 #                        7,922,512 sentences
+  - opus_CCAligned/v1 #                                   6,938,181 sentences
+  - opus_DGT/v2019 #                                      5,118,830 sentences
+  - opus_XLEnt/v1.2 #                                     2,594,162 sentences
+  - opus_TildeMODEL/v2018 #                               2,190,889 sentences
+  - opus_EMEA/v3 #                                        1,054,178 sentences
+  - opus_ELRC-2721-EMEA/v1 #                                780,098 sentences
+  - opus_Europarl/v8 #                                      639,958 sentences
+  - opus_EUbookshop/v2 #                                    452,097 sentences
+  - opus_ELITR-ECA/v1 #                                     294,356 sentences
+  - opus_WikiMatrix/v1 #                                    178,985 sentences
+  - opus_QED/v2.0a #                                        173,727 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        142,656 sentences
+  - opus_ECB/v1 #                                           122,131 sentences
+  - opus_TED2020/v1 #                                       106,067 sentences
+  - opus_KDE4/v2 #                                          105,425 sentences
+  - opus_NeuLab-TedTalks/v1 #                                67,607 sentences
+  - opus_bible-uedin/v1 #                                    62,159 sentences
+  - opus_ELRC-5067-SciPar/v1 #                               60,468 sentences
+  - opus_JRC-Acquis/v3.0 #                                   35,744 sentences
+  - opus_PHP/v1 #                                            31,173 sentences
+  - opus_ELRC-3570-EUR_LEX_covid/v1 #                        22,479 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,479 sentences
+  - opus_wikimedia/v20230407 #                               18,819 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            16,369 sentences
+  - opus_ELRC-1179-EUIPO_2017/v1 #                           16,313 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                16,313 sentences
+  - opus_ELRC-2880-EU_publications_medi/v1 #                 12,927 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,927 sentences
+  - opus_EUconst/v1 #                                        10,119 sentences
+  - opus_ELRC-1072-annual_reports_immig/v1 #                  8,041 sentences
+  - opus_ELRA-W0136/v1 #                                      8,040 sentences
+  - opus_ELRC-3611-presscorner_covid/v1 #                     6,982 sentences
+  - opus_ELRC-1073-annual_reports_Slova/v1 #                  6,008 sentences
+  - opus_ELRA-W0137/v1 #                                      6,007 sentences
+  - opus_ELRC-1074-annual_reports_Stati/v1 #                  5,614 sentences
+  - opus_ELRC_3382/v1 #                                       3,624 sentences
+  - opus_ELRC-488-Justice_Slovak/v1 #                         2,896 sentences
+  - opus_ELRA-W0189/v1 #                                      2,895 sentences
+  - opus_ELRC-487-Culture_Slovak/v1 #                         2,610 sentences
+  - opus_ELRA-W0188/v1 #                                      2,609 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,526 sentences
+  - opus_ELRC-3469-EC_EUROPA_covid/v1 #                       2,409 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,409 sentences
+  - opus_ELRC-3209-antibiotic/v1 #                            1,010 sentences
+  - opus_ELRC-antibiotic/v1 #                                 1,010 sentences
+  - opus_ELRC-3298-EUROPARL_covid/v1 #                          653 sentences
+  - opus_ELRC-2745-vaccination/v1 #                             510 sentences
+  - opus_ELRC-vaccination/v1 #                                  510 sentences
+  - opus_ELRC_2923/v1 #                                         448 sentences
+  - mtdata_ELRC-annual_reports_immigration_asylum_policies_emn_contact_point_slovak-1-eng-slk
+  - mtdata_ELRC-annual_reports_slovak_centre_human_rights-1-eng-slk
+  - mtdata_ELRC-annual_reports_statistical_slovak-1-eng-slk
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-slk
+  - mtdata_ELRC-wikipedia_health-1-eng-slk
+  - mtdata_ELRC-nteu_tierb-1-eng-slk
+  - mtdata_EU-eac_forms-1-eng-slk #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-slk #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-slk #                            ~548,757 sentences (62.0 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-slk #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-slk #                    ~1,269,685 sentences (143.5 MB)
+  - mtdata_Tilde-ema-2016-eng-slk #                       ~238,237 sentences (26.9 MB)
+  - mtdata_Tilde-rapid-2016-eng-slk #                     ~214,164 sentences (24.2 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-trg: []
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-sl-spring-2024.yml b/configs/en-sl-spring-2024.yml
new file mode 100644
index 000000000..26db44623
--- /dev/null
+++ b/configs/en-sl-spring-2024.yml
@@ -0,0 +1,182 @@
+# The initial configuration was generated using:
+# task config-generator -- en sl --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: sl
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-slv
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   89,057,699 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (27,406,782 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (1,875,517 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-euipo_2017-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-slv - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-slv - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-slv - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-slv-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-slv_SI - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-slv - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       27,406,782 sentences
+  - opus_OpenSubtitles/v2018 #                           19,641,457 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,288,992 sentences
+  - opus_ParaCrawl/v9 #                                   9,516,259 sentences
+  - opus_DGT/v2019 #                                      5,125,455 sentences
+  - opus_CCAligned/v1 #                                   4,366,555 sentences
+  - opus_TildeMODEL/v2018 #                               2,048,216 sentences
+  - opus_MaCoCu/v2 #                                      1,875,518 sentences
+  - opus_EMEA/v3 #                                        1,045,041 sentences
+  - opus_XLEnt/v1.2 #                                       861,509 sentences
+  - opus_ELRC-2727-EMEA/v1 #                                766,139 sentences
+  - opus_Europarl/v8 #                                      624,803 sentences
+  - opus_EUbookshop/v2 #                                    405,653 sentences
+  - opus_WikiMatrix/v1 #                                    318,028 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              300,017 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        150,090 sentences
+  - opus_Wikipedia/v1.0 #                                   140,124 sentences
+  - opus_KDE4/v2 #                                          119,645 sentences
+  - opus_ELITR-ECA/v1 #                                     102,934 sentences
+  - opus_ECB/v1 #                                            89,634 sentences
+  - opus_QED/v2.0a #                                         79,196 sentences
+  - opus_wikimedia/v20230407 #                               73,612 sentences
+  - opus_ELRC-490-Secretariat_General_/v1 #                  63,070 sentences
+  - opus_ELRA-W0191/v1 #                                     63,069 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_ELRC-489-Secretariat_General_/v1 #                  55,185 sentences
+  - opus_ELRA-W0190/v1 #                                     55,184 sentences
+  - opus_JRC-Acquis/v3.0 #                                   53,390 sentences
+  - opus_TED2020/v1 #                                        44,340 sentences
+  - opus_PHP/v1 #                                            30,550 sentences
+  - opus_NeuLab-TedTalks/v1 #                                22,856 sentences
+  - opus_ELRC-3576-EUR_LEX_covid/v1 #                        22,381 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,381 sentences
+  - opus_ELRC-1180-EUIPO_2017/v1 #                           19,767 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                19,767 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          17,519 sentences
+  - opus_ELRC-antibiotic/v1 #                                16,166 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            15,269 sentences
+  - opus_TED2013/v1.1 #                                      14,960 sentences
+  - opus_ELRC-2886-EU_publications_medi/v1 #                 13,209 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,209 sentences
+  - opus_ELRC-924-statistical_reports_/v1 #                  11,860 sentences
+  - opus_ELRC-statistical_reports/v1 #                       11,860 sentences
+  - opus_ELRA-W0267/v1 #                                     11,859 sentences
+  - opus_EUconst/v1 #                                         8,807 sentences
+  - opus_GNOME/v1 #                                           8,070 sentences
+  - opus_ELRC-3617-presscorner_covid/v1 #                     6,995 sentences
+  - opus_ELRC-923-chapters_Youth_2010/v1 #                    5,866 sentences
+  - opus_Tatoeba/v2023-04-12 #                                4,302 sentences
+  - opus_ELRC_3382/v1 #                                       3,633 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,546 sentences
+  - opus_ELRC-3475-EC_EUROPA_covid/v1 #                       2,534 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,534 sentences
+  - opus_ELRC-2328-Agriculture_Forestry/v1 #                  1,551 sentences
+  - opus_ELRC-3215-antibiotic/v1 #                              986 sentences
+  - opus_ELRC-3085-wikipedia_health/v1 #                        960 sentences
+  - opus_ELRC_2922/v1 #                                         959 sentences
+  - opus_ELRC-3304-EUROPARL_covid/v1 #                          816 sentences
+  - opus_ELRC-2737-vaccination/v1 #                             492 sentences
+  - opus_ELRC-vaccination/v1 #                                  492 sentences
+  - opus_ELRC_2923/v1 #                                         451 sentences
+  - mtdata_ELRC-secretariat_general_part1-1-eng-slv
+  - mtdata_ELRC-secretariat_general_part2-1-eng-slv
+  - mtdata_ELRC-chapters_youth_2010_social_profile_young_people_slovenia_publication-1-eng-slv
+  - mtdata_ELRC-statistical_reports_statistical_slovenia-1-eng-slv
+  - mtdata_ELRC-agriculture_forestry_food_slovenia-1-eng-slv
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-slv
+  - mtdata_EU-eac_forms-1-eng-slv #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-slv #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-slv #                            ~539,490 sentences (61.0 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-slv #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-slv #                    ~1,116,707 sentences (126.2 MB)
+  - mtdata_Tilde-ema-2016-eng-slv #                       ~223,681 sentences (25.3 MB)
+  - mtdata_Tilde-rapid-2016-eng-slv #                     ~203,695 sentences (23.0 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-trg: []
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-sr-spring-2024.yml b/configs/en-sr-spring-2024.yml
new file mode 100644
index 000000000..6de158fae
--- /dev/null
+++ b/configs/en-sr-spring-2024.yml
@@ -0,0 +1,131 @@
+# The initial configuration was generated using:
+# task config-generator -- en sr --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: sr
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-srp
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   78,565,711 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (26,510,872 sentences)
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (3,904,384 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (114 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (113 sentences)
+  #  - opus_tldr-pages/v2023-08-29 - not enough data  (26 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-srp - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-srp - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-srp - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-srp_RS - duplicate with opus
+  train:
+  - opus_OpenSubtitles/v2018  #                           42,635,098 sentences
+  - opus_NLLB/v1 #                                       26,510,872 sentences
+  - opus_HPLT/v1.1 #                                      3,904,423 sentences
+  - opus_CCAligned/v1 #                                   1,992,692 sentences
+  - opus_XLEnt/v1.2 #                                     1,474,447 sentences
+  - opus_WikiMatrix/v1 #                                    395,569 sentences
+  - opus_GoURMET/v1 #                                       329,004 sentences
+  - opus_QED/v2.0a #                                        284,942 sentences
+  - opus_TED2020/v1 #                                       260,966 sentences
+  - opus_SETIMES/v2 #                                       225,169 sentences
+  - opus_wikimedia/v20230407 #                              217,199 sentences
+  - opus_NeuLab-TedTalks/v1 #                               152,477 sentences
+  - opus_bible-uedin/v1 #                                    62,131 sentences
+  - opus_KDE4/v2 #                                           60,827 sentences
+  - opus_Tatoeba/v2023-04-12 #                               21,760 sentences
+  - opus_GlobalVoices/v2018q4 #                              20,309 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          12,707 sentences
+  - opus_TildeMODEL/v2018 #                                   2,024 sentences
+  - opus_EUbookshop/v2 #                                      1,608 sentences
+  - opus_ELRC-3041-wikipedia_health/v1 #                        744 sentences
+  - opus_ELRC_2922/v1 #                                         743 sentences
+  - mtdata_ELRC-swedish_social_security-1-eng-srp
+  - mtdata_Neulab-tedtalks_test-1-eng-srp #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-worldbank-1-eng-srp #                      ~2,533 sentences (286.3 kB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~35,920,209 sentences
+  mono-trg:
+  - news-crawl_news.2008  #              ~3,522 sentences (398K)
+  - news-crawl_news.2009 #             ~18,584 sentences (2.1M)
+  - news-crawl_news.2010 #              ~9,734 sentences (1.1M)
+  - news-crawl_news.2011 #              ~2,530 sentences (286K)
+  - news-crawl_news.2018 #             ~18,584 sentences (2.1M)
+  - news-crawl_news.2019 #          ~1,929,203 sentences (218M)
+  - news-crawl_news.2020 #          ~5,619,469 sentences (635M)
+  - news-crawl_news.2021 #          ~8,849,557 sentences (1.0G)
+  - news-crawl_news.2022 #          ~9,734,513 sentences (1.1G)
+  - news-crawl_news.2023 #          ~9,734,513 sentences (1.1G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-sv-spring-2024.yml b/configs/en-sv-spring-2024.yml
new file mode 100644
index 000000000..59db07584
--- /dev/null
+++ b/configs/en-sv-spring-2024.yml
@@ -0,0 +1,239 @@
+# The initial configuration was generated using:
+# task config-generator -- en sv --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: sv
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Lindat-khresmoi_summary_dev-2-eng-swe
+  - mtdata_Neulab-tedtalks_dev-1-eng-swe
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   211,400,324 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (77,008,059 sentences)
+  #  - opus_RF/v1 - not enough data  (180 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-swedish_labour_part2-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-swedish_labour_part1-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-swedish_food-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.norden.org-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.sida.se-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.visitestonia.com-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.vtv.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-valtioneuvosto.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-vnk.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.turku.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.vero.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-swe - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-swe - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-swe - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-swe - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-swe - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-swe-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-swe_SE - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       77,008,059 sentences
+  - opus_ParaCrawl/v9 #                                  49,110,322 sentences
+  - opus_OpenSubtitles/v2018 #                           17,660,152 sentences
+  - opus_ELRC-4268-NTEU_TierA/v1 #                       12,737,597 sentences
+  - opus_CCAligned/v1 #                                  12,544,114 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,083,941 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   8,058,690 sentences
+  - opus_DGT/v2019 #                                      5,139,521 sentences
+  - opus_XLEnt/v1.2 #                                     3,674,011 sentences
+  - opus_TildeMODEL/v2018 #                               3,234,207 sentences
+  - opus_EUbookshop/v2 #                                  1,915,479 sentences
+  - opus_Europarl/v8 #                                    1,892,723 sentences
+  - opus_EMEA/v3 #                                        1,086,217 sentences
+  - opus_JRC-Acquis/v3.0 #                                  792,924 sentences
+  - opus_ELRC-2725-EMEA/v1 #                                759,846 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              670,816 sentences
+  - opus_WikiMatrix/v1 #                                    546,289 sentences
+  - opus_ELITR-ECA/v1 #                                     389,808 sentences
+  - opus_KDE4/v2 #                                          232,485 sentences
+  - opus_QED/v2.0a #                                        171,126 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        147,973 sentences
+  - opus_Tanzil/v1 #                                        127,493 sentences
+  - opus_ELRC-Swedish_Migration/v1 #                        124,398 sentences
+  - opus_TED2020/v1 #                                       120,718 sentences
+  - opus_ELRC-www.turku.fi/v1 #                             107,773 sentences
+  - opus_NeuLab-TedTalks/v1 #                                69,332 sentences
+  - opus_wikimedia/v20230407 #                               63,135 sentences
+  - opus_bible-uedin/v1 #                                    62,137 sentences
+  - opus_infopankki/v1 #                                     51,749 sentences
+  - opus_ELRC-1770-valtioneuvosto.fi/v1 #                    49,084 sentences
+  - opus_ELRC-valtioneuvosto.fi/v1 #                         49,084 sentences
+  - opus_ELRC-1133-www.vtv.fi/v1 #                           46,501 sentences
+  - opus_ELRC-www.vtv.fi/v1 #                                46,501 sentences
+  - opus_ELRC-734-www.norden.org/v1 #                        37,763 sentences
+  - opus_ELRC-www.norden.org/v1 #                            37,763 sentences
+  - opus_ELRC-1772-vnk.fi/v1 #                               33,627 sentences
+  - opus_ELRC-vnk.fi/v1 #                                    33,627 sentences
+  - opus_WikiSource/v1 #                                     33,283 sentences
+  - opus_ELRC-817-Swedish_Audit_Riksre/v1 #                  30,352 sentences
+  - opus_PHP/v1 #                                            30,198 sentences
+  - opus_Tatoeba/v2023-04-12 #                               27,050 sentences
+  - opus_ELRC-3574-EUR_LEX_covid/v1 #                        22,445 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,445 sentences
+  - opus_ELRC-2037-www.vero.fi/v1 #                          22,317 sentences
+  - opus_ELRC-www.vero.fi/v1 #                               22,317 sentences
+  - opus_ELRC-2026-EUIPO_2017/v1 #                           16,947 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                16,947 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            15,681 sentences
+  - opus_ELRC-antibiotic/v1 #                                15,452 sentences
+  - opus_ELRC-1131-www.visitestonia.com/v1 #                 14,335 sentences
+  - opus_ELRC-www.visitestonia.com/v1 #                      14,335 sentences
+  - opus_ELRC-2884-EU_publications_medi/v1 #                 13,100 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,100 sentences
+  - opus_ELRC-802-Swedish_Competition_/v1 #                  11,374 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          10,395 sentences
+  - opus_ELRC-928-Annual_Reports_Swedi/v1 #                  10,227 sentences
+  - opus_EUconst/v1 #                                         9,954 sentences
+  - opus_ELRC-2033-www.turku.fi/v1 #                          9,706 sentences
+  - opus_GlobalVoices/v2018q4 #                               8,793 sentences
+  - opus_ELRC-829-Swedish_Migration_Bo/v1 #                   8,366 sentences
+  - opus_ELRA-W0239/v1 #                                      8,365 sentences
+  - opus_ELRC-417-Swedish_Work_Environ/v1 #                   7,475 sentences
+  - opus_ELRC-3615-presscorner_covid/v1 #                     6,856 sentences
+  - opus_ELRC-744-Finnish_Information_/v1 #                   6,819 sentences
+  - opus_ELRC-Finnish_Information/v1 #                        6,819 sentences
+  - opus_ELRA-W0222/v1 #                                      6,818 sentences
+  - opus_ELRC_3382/v1 #                                       3,760 sentences
+  - opus_Books/v1 #                                           3,095 sentences
+  - opus_ELRC-Swedish_Labour/v1 #                             2,778 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,528 sentences
+  - opus_ELRC-1013-Sweden_a_Pocket/v1 #                       2,200 sentences
+  - opus_ELRA-W0130/v1 #                                      2,199 sentences
+  - opus_ELRC-712-Social_Insurance_Frs/v1 #                   1,953 sentences
+  - opus_ELRA-W0213/v1 #                                      1,952 sentences
+  - opus_ELRC-3473-EC_EUROPA_covid/v1 #                       1,858 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  1,858 sentences
+  - opus_ELRC-401-Swedish_Labour_Part2/v1 #                   1,768 sentences
+  - opus_ELRC-929-www.sida.se/v1 #                            1,545 sentences
+  - opus_ELRC-823-Swedish_Swedish_Crim/v1 #                   1,503 sentences
+  - opus_ELRC-416-Swedish_Social_Secur/v1 #                   1,447 sentences
+  - opus_ELRC-416-Swedish_Social_Secur/v1 #                   1,446 sentences
+  - opus_ELRC-436-Swedish_Food/v1 #                           1,147 sentences
+  - opus_ELRA-W0305/v1 #                                      1,146 sentences
+  - opus_ELRC-406-Swedish_Labour_Part1/v1 #                   1,011 sentences
+  - opus_ELRC-3213-antibiotic/v1 #                              953 sentences
+  - opus_ELRC-830-Swedish_Economic_Reg/v1 #                     949 sentences
+  - opus_ELRC-3302-EUROPARL_covid/v1 #                          844 sentences
+  - opus_tldr-pages/v2023-08-29 #                               566 sentences
+  - opus_ELRC-3082-wikipedia_health/v1 #                        535 sentences
+  - opus_ELRC_2922/v1 #                                         534 sentences
+  - opus_ELRC_2923/v1 #                                         499 sentences
+  - opus_ELRC-2752-vaccination/v1 #                             497 sentences
+  - opus_ELRC-vaccination/v1 #                                  497 sentences
+  - mtdata_ELRC-swedish_social_security-1-eng-swe
+  - mtdata_ELRC-swedish_work_environment-1-eng-swe
+  - mtdata_ELRC-social_insurance_försäkringskassan-1-eng-swe
+  - mtdata_ELRC-finnish_information_bank-1-eng-swe
+  - mtdata_ELRC-swedish_competition_authority_konkurrensverket-1-eng-swe
+  - mtdata_ELRC-swedish_audit_riksrevisionen-1-eng-swe
+  - mtdata_ELRC-swedish_swedish_crime_victim_compensation_support_authority-1-eng-swe
+  - mtdata_ELRC-swedish_migration_board_migrationsverket-1-eng-swe
+  - mtdata_ELRC-swedish_economic_regional_growth_tillväxtverket-1-eng-swe
+  - mtdata_ELRC-annual_reports_swedish_pension_system-1-eng-swe
+  - mtdata_ELRC-sweden_a_pocket_guide_book-1-eng-swe
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-swe
+  - mtdata_ELRC-nteu_tierb-1-eng-swe
+  - mtdata_EU-eac_forms-1-eng-swe #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-swe #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-swe #                            ~980,674 sentences (110.8 MB)
+  - mtdata_Lindat-khresmoi_summary_test-2-eng-swe #        ~11,808 sentences (1.3 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-swe #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-dcep_wmt17-1-swe-eng #                ~1,137,607 sentences (128.5 MB)
+  - mtdata_Statmt-books_wmt17-1-swe-eng #                   ~2,797 sentences (316.2 kB)
+  - mtdata_Tilde-eesc-2017-eng-swe #                    ~1,798,328 sentences (203.2 MB)
+  - mtdata_Tilde-ema-2016-eng-swe #                       ~215,912 sentences (24.4 MB)
+  - mtdata_Tilde-ecb-2017-eng-swe #                         ~3,314 sentences (374.5 kB)
+  - mtdata_Tilde-rapid-2016-eng-swe #                     ~400,648 sentences (45.3 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-trg: []
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-tr-spring-2024.yml b/configs/en-tr-spring-2024.yml
new file mode 100644
index 000000000..816433fde
--- /dev/null
+++ b/configs/en-tr-spring-2024.yml
@@ -0,0 +1,144 @@
+# The initial configuration was generated using:
+# task config-generator -- en tr --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: tr
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-tur
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt18/test-ts
+  - sacrebleu_aug-mix_wmt16
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt18
+  - sacrebleu_wmt17
+  - sacrebleu_wmt16/dev
+
+  # The training data contains:
+  #   121,323,758 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (47,045,956 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (1,646,740 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-tur - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-tur - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-tur - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-tur - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-tur_TR - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       47,045,956 sentences
+  - opus_OpenSubtitles/v2018 #                           44,986,121 sentences
+  - opus_CCAligned/v1 #                                  13,650,311 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,915,248 sentences
+  - opus_XLEnt/v1.2 #                                     3,809,464 sentences
+  - opus_MaCoCu/v2 #                                      1,646,741 sentences
+  - opus_GoURMET/v1 #                                     1,308,307 sentences
+  - opus_Tanzil/v1 #                                      1,189,967 sentences
+  - opus_Tatoeba/v2023-04-12 #                              676,920 sentences
+  - opus_wikimedia/v20230407 #                              668,099 sentences
+  - opus_QED/v2.0a #                                        482,964 sentences
+  - opus_WikiMatrix/v1 #                                    477,736 sentences
+  - opus_TED2020/v1 #                                       378,033 sentences
+  - opus_SETIMES/v2 #                                       207,678 sentences
+  - opus_NeuLab-TedTalks/v1 #                               195,641 sentences
+  - opus_Wikipedia/v1.0 #                                   159,979 sentences
+  - opus_KDE4/v2 #                                          153,438 sentences
+  - opus_TED2013/v1.1 #                                     137,028 sentences
+  - opus_bible-uedin/v1 #                                    60,411 sentences
+  - opus_infopankki/v1 #                                     44,030 sentences
+  - opus_Bianet/v1 #                                         34,770 sentences
+  - opus_PHP/v1 #                                            32,713 sentences
+  - opus_EUbookshop/v2 #                                     23,706 sentences
+  - opus_WMT-News/v2019 #                                    20,016 sentences
+  - opus_GlobalVoices/v2018q4 #                               7,838 sentences
+  - opus_ELRC-3057-wikipedia_health/v1 #                      2,368 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           2,368 sentences
+  - opus_ELRC_2922/v1 #                                       2,367 sentences
+  - opus_tldr-pages/v2023-08-29 #                             1,956 sentences
+  - opus_TildeMODEL/v2018 #                                   1,584 sentences
+  - mtdata_EU-eac_forms-1-eng-tur #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-tur #                    ~31,162 sentences (3.5 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-tur #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-newsdev_tren-2016-tur-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_entr-2016-eng-tur #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-worldbank-1-eng-tur #                      ~1,827 sentences (206.5 kB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~20,230,124 sentences
+  mono-trg:
+  - news-crawl_news.2010  #                 ~38 sentences (4.4K)
+  - news-crawl_news.2017 #          ~1,194,690 sentences (135M)
+  - news-crawl_news.2018 #          ~1,964,601 sentences (222M)
+  - news-crawl_news.2019 #          ~3,168,141 sentences (358M)
+  - news-crawl_news.2020 #          ~3,716,814 sentences (420M)
+  - news-crawl_news.2021 #          ~3,814,159 sentences (431M)
+  - news-crawl_news.2022 #          ~3,575,221 sentences (404M)
+  - news-crawl_news.2023 #          ~2,796,460 sentences (316M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-uk-spring-2024.yml b/configs/en-uk-spring-2024.yml
new file mode 100644
index 000000000..32b4eb83a
--- /dev/null
+++ b/configs/en-uk-spring-2024.yml
@@ -0,0 +1,134 @@
+# The initial configuration was generated using:
+# task config-generator -- en uk --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: uk
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ukr
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   58,968,083 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (20,240,171 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (6,406,288 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-ukr - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-ukr - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ukr - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-ukr_UA - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       20,240,171 sentences
+  - opus_ParaCrawl/v9 #                                  14,079,832 sentences
+  - opus_CCAligned/v1 #                                   8,547,377 sentences
+  - opus_MaCoCu/v2 #                                      6,406,294 sentences
+  - opus_XLEnt/v1.2 #                                     3,671,061 sentences
+  - opus_SUMMA/v1 #                                       1,574,611 sentences
+  - opus_OpenSubtitles/v2018 #                              877,780 sentences
+  - opus_wikimedia/v20230407 #                              757,910 sentences
+  - opus_WikiMatrix/v1 #                                    681,115 sentences
+  - opus_ELRC-5214-A_Lexicon_Named/v1 #                     495,403 sentences
+  - opus_ELRC-5183-SciPar_Ukraine/v1 #                      306,813 sentences
+  - opus_KDE4/v2 #                                          233,611 sentences
+  - opus_QED/v2.0a #                                        215,630 sentences
+  - opus_TED2020/v1 #                                       208,141 sentences
+  - opus_Tatoeba/v2023-04-12 #                              175,502 sentences
+  - opus_ELRC-5179-acts_Ukrainian/v1 #                      129,942 sentences
+  - opus_ELRC-5180-Official_Parliament_/v1 #                116,260 sentences
+  - opus_NeuLab-TedTalks/v1 #                               115,474 sentences
+  - opus_ELRC-5181-Official_Parliament_/v1 #                 61,012 sentences
+  - opus_ELRC-5174-French_Polish_Ukrain/v1 #                 36,228 sentences
+  - opus_bible-uedin/v1 #                                    15,901 sentences
+  - opus_ELRC-5182-Official_Parliament_/v1 #                  8,800 sentences
+  - opus_ELRC-3043-wikipedia_health/v1 #                      2,735 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           2,735 sentences
+  - opus_ELRC_2922/v1 #                                       2,734 sentences
+  - opus_EUbookshop/v2 #                                      1,793 sentences
+  - opus_TildeMODEL/v2018 #                                   1,628 sentences
+  - opus_ELRC-5217-Ukrainian_Legal_MT/v1 #                      997 sentences
+  - opus_tldr-pages/v2023-08-29 #                               593 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-ukr #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-worldbank-1-eng-ukr #                      ~2,011 sentences (227.3 kB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~463,898 sentences
+  mono-trg:
+  - news-crawl_news.2008  #              ~6,070 sentences (686K)
+  - news-crawl_news.2009 #             ~30,088 sentences (3.4M)
+  - news-crawl_news.2010 #              ~6,504 sentences (735K)
+  - news-crawl_news.2011 #             ~58,407 sentences (6.6M)
+  - news-crawl_news.2012 #             ~68,141 sentences (7.7M)
+  - news-crawl_news.2013 #             ~82,300 sentences (9.3M)
+  - news-crawl_news.2014 #             ~87,610 sentences (9.9M)
+  - news-crawl_news.2016 #             ~39,823 sentences (4.5M)
+  - news-crawl_news.2018 #             ~84,955 sentences (9.6M)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/en-vi-spring-2024.yml b/configs/en-vi-spring-2024.yml
new file mode 100644
index 000000000..1d9d9de21
--- /dev/null
+++ b/configs/en-vi-spring-2024.yml
@@ -0,0 +1,111 @@
+# The initial configuration was generated using:
+# task config-generator -- en vi --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: en
+  trg: vi
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-vie
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   69,085,316 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (50,092,444 sentences)
+  #  - opus_GNOME/v1 - not enough data  (149 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - opus_XLEnt/v1.2 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-vie - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-vie - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-vie - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-vie_VN - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       50,092,444 sentences
+  - opus_CCAligned/v1 #                                  12,394,417 sentences
+  - opus_OpenSubtitles/v2018 #                            3,505,276 sentences
+  - opus_WikiMatrix/v1 #                                  1,073,752 sentences
+  - opus_wikimedia/v20230407 #                              669,743 sentences
+  - opus_QED/v2.0a #                                        338,024 sentences
+  - opus_TED2020/v1 #                                       326,417 sentences
+  - opus_NeuLab-TedTalks/v1 #                               184,973 sentences
+  - opus_StanfordNLP-NMT/v1.0 #                             133,167 sentences
+  - opus_ELRC-wikipedia_health/v1 #                         126,413 sentences
+  - opus_bible-uedin/v1 #                                   124,390 sentences
+  - opus_Wikipedia/v1.0 #                                    58,116 sentences
+  - opus_KDE4/v2 #                                           42,782 sentences
+  - opus_Tatoeba/v2023-04-12 #                                6,855 sentences
+  - opus_ELRC-3086-wikipedia_health/v1 #                      4,274 sentences
+  - opus_ELRC_2922/v1 #                                       4,273 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-vie #             ~3,117,009 sentences (352.2 MB)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-src:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-trg: []
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/hr-en-spring-2024.yml b/configs/hr-en-spring-2024.yml
new file mode 100644
index 000000000..f97abd59a
--- /dev/null
+++ b/configs/hr-en-spring-2024.yml
@@ -0,0 +1,225 @@
+# The initial configuration was generated using:
+# task config-generator -- hr en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: hr
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-hrv
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   99,724,833 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (18,797,643 sentences)
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (9,310,276 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (2,266,005 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-croatian_bank-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-croatian_mine_action-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-agriculture-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-hrv - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-hrv - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-hrv - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-hrv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-hrv - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-hrv_HR - duplicate with opus
+  train:
+  - opus_OpenSubtitles/v2018  #                           35,131,729 sentences
+  - opus_NLLB/v1 #                                       18,797,643 sentences
+  - opus_ELRC-EMEA/v1 #                                  10,890,456 sentences
+  - opus_CCAligned/v1 #                                   9,376,190 sentences
+  - opus_HPLT/v1.1 #                                      9,310,369 sentences
+  - opus_ParaCrawl/v9 #                                   3,240,485 sentences
+  - opus_XLEnt/v1.2 #                                     2,844,710 sentences
+  - opus_ELRC-4142-NTEU_TierA/v1 #                        2,290,893 sentences
+  - opus_MaCoCu/v2 #                                      2,266,007 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              806,581 sentences
+  - opus_TildeMODEL/v2018 #                                 745,616 sentences
+  - opus_DGT/v2019 #                                        722,182 sentences
+  - opus_ELRC-2706-EMEA/v1 #                                650,030 sentences
+  - opus_WikiMatrix/v1 #                                    259,499 sentences
+  - opus_QED/v2.0a #                                        208,129 sentences
+  - opus_SETIMES/v2 #                                       205,910 sentences
+  - opus_TED2020/v1 #                                       197,411 sentences
+  - opus_ELITR-ECA/v1 #                                     181,038 sentences
+  - opus_EuroPat/v3 #                                       154,775 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        140,795 sentences
+  - opus_ELRC-Regional_Development/v1 #                     136,809 sentences
+  - opus_NeuLab-TedTalks/v1 #                               128,233 sentences
+  - opus_ELRC-Rural_Development/v1 #                        105,562 sentences
+  - opus_hrenWaC/v1 #                                        99,001 sentences
+  - opus_KDE4/v2 #                                           87,333 sentences
+  - opus_TedTalks/v1 #                                       86,348 sentences
+  - opus_ELRC-2542-Agriculture/v1 #                          68,376 sentences
+  - opus_bible-uedin/v1 #                                    62,179 sentences
+  - opus_ELRC-4329-PRINCIPLE_MVEP_legal/v1 #                 44,460 sentences
+  - opus_wikimedia/v20230407 #                               42,034 sentences
+  - opus_GNOME/v1 #                                          35,429 sentences
+  - opus_ELRC-3556-EUR_LEX_covid/v1 #                        22,010 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,010 sentences
+  - opus_ELRC-651-government_websites_/v1 #                  21,341 sentences
+  - opus_ELRC-government_websites/v1 #                       21,341 sentences
+  - opus_ELRA-W0204/v1 #                                     21,340 sentences
+  - opus_ELRC-943-Journal_Croatian_Ass/v1 #                  18,478 sentences
+  - opus_ELRA-W0273/v1 #                                     18,477 sentences
+  - opus_ELRC-1015-Croatian_Mine_Action/v1 #                 17,602 sentences
+  - opus_ELRA-W0131/v1 #                                     17,601 sentences
+  - opus_ELRC-1174-EUIPO_2017/v1 #                           17,205 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,205 sentences
+  - opus_ELRC-2866-EU_publications_medi/v1 #                 12,837 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,837 sentences
+  - opus_ELRC-921-studies_challenges_C/v1 #                  11,781 sentences
+  - opus_ELRA-W0266/v1 #                                     11,780 sentences
+  - opus_ELRC-915-statistical_reports_/v1 #                  11,738 sentences
+  - opus_ELRC-statistical_reports/v1 #                       11,738 sentences
+  - opus_ELRA-W0264/v1 #                                     11,737 sentences
+  - opus_ELRC-788-Croatian_Bank/v1 #                         11,708 sentences
+  - opus_ELRA-W0226/v1 #                                     11,707 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            10,175 sentences
+  - opus_ELRC-2541-Regional_Development/v1 #                  7,911 sentences
+  - opus_ELRC-3597-presscorner_covid/v1 #                     6,645 sentences
+  - opus_EUbookshop/v2 #                                      6,104 sentences
+  - opus_ELRC-992-Rural_Development_Pr/v1 #                   5,202 sentences
+  - opus_ELRC_3382/v1 #                                       3,671 sentences
+  - opus_ELRC-989-Foreign_Affairs_Croa/v1 #                   3,103 sentences
+  - opus_ELRC-Foreign_Affairs/v1 #                            3,103 sentences
+  - opus_ELRA-W0293/v1 #                                      3,102 sentences
+  - opus_ELRC-3478-EC_EUROPA_covid/v1 #                       2,595 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,595 sentences
+  - opus_Tatoeba/v2023-04-12 #                                2,454 sentences
+  - opus_ELRC-991-Croatian_Journal_Fis/v1 #                   2,408 sentences
+  - opus_ELRA-W0294/v1 #                                      2,407 sentences
+  - opus_ELRC-1080-Acts_Biological_Land/v1 #                  2,329 sentences
+  - opus_ELRA-W0142/v1 #                                      2,328 sentences
+  - opus_ELRC-1058-University_Library_Z/v1 #                  2,310 sentences
+  - opus_ELRA-W0135/v1 #                                      2,309 sentences
+  - opus_ELRC-986-Embassy_Finland_Zagr/v1 #                   1,967 sentences
+  - opus_ELRA-W0292/v1 #                                      1,966 sentences
+  - opus_ELRC-1159-Swedish_Migration_Bo/v1 #                  1,112 sentences
+  - opus_ELRC-Swedish_Migration/v1 #                          1,112 sentences
+  - opus_ELRC-3193-antibiotic/v1 #                            1,070 sentences
+  - opus_ELRC-antibiotic/v1 #                                 1,070 sentences
+  - opus_ELRC-984-Government_Cooperati/v1 #                   1,026 sentences
+  - opus_ELRA-W0291/v1 #                                      1,025 sentences
+  - opus_ELRC-996-nature_protection_st/v1 #                     970 sentences
+  - opus_ELRC-825-Croatian_Swedish_Cri/v1 #                     907 sentences
+  - opus_ELRA-W0238/v1 #                                        906 sentences
+  - opus_ELRC-2753-vaccination/v1 #                             509 sentences
+  - opus_ELRC-vaccination/v1 #                                  509 sentences
+  - opus_ELRC_2922/v1 #                                         485 sentences
+  - opus_ELRC-3284-EUROPARL_covid/v1 #                          475 sentences
+  - opus_ELRC_2923/v1 #                                         288 sentences
+  - mtdata_ELRC-government_websites_croatian-1-eng-hrv
+  - mtdata_ELRC-croatian_swedish_crime_victim_compensation_support_authority-1-eng-hrv
+  - mtdata_ELRC-statistical_reports_studies_croatian_bureau_statistics-1-eng-hrv
+  - mtdata_ELRC-studies_challenges_croatian_accession_union_croatian_institute_finance-1-eng-hrv
+  - mtdata_ELRC-journal_croatian_association_civil_engineers-1-eng-hrv
+  - mtdata_ELRC-government_cooperation_ngos-1-eng-hrv
+  - mtdata_ELRC-embassy_finland_zagreb-1-eng-hrv
+  - mtdata_ELRC-foreign_affairs_croatia-1-eng-hrv
+  - mtdata_ELRC-croatian_journal_fisheries-1-eng-hrv
+  - mtdata_ELRC-rural_development_programme_period_2014_2020_croatian_rural_development_programme-1-eng-hrv
+  - mtdata_ELRC-nature_protection_strategy_croatia-1-eng-hrv
+  - mtdata_ELRC-university_library_zagreb-1-eng-hrv
+  - mtdata_ELRC-acts_biological_landscape_diversity_environmental_protection-1-eng-hrv
+  - mtdata_ELRC-swedish_migration_board_migrationsverket-1-eng-hrv
+  - mtdata_ELRC-regional_development_funds-1-eng-hrv
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-hrv
+  - mtdata_ELRC-wikipedia_health-1-eng-hrv
+  - mtdata_ELRC-nteu_tierb-1-eng-hrv
+  - mtdata_EU-eac_reference-1-eng-hrv #                    ~31,162 sentences (3.5 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-hrv #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-hrv #                      ~216,663 sentences (24.5 MB)
+  - mtdata_Tilde-ema-2016-eng-hrv #                       ~209,283 sentences (23.6 MB)
+  - mtdata_Tilde-ecb-2017-eng-hrv #                           ~876 sentences (99.0 kB)
+  - mtdata_Tilde-rapid-2016-eng-hrv #                      ~45,055 sentences (5.1 MB)
+  - mtdata_Tilde-worldbank-1-eng-hrv #                      ~1,566 sentences (177.0 kB)
+
+  # The monolingual data contains:
+  #   ~11,498,228 sentences
+  mono-src:
+  - news-crawl_news.2014  #             ~46,902 sentences (5.3M)
+  - news-crawl_news.2019 #          ~1,398,230 sentences (158M)
+  - news-crawl_news.2020 #          ~2,610,619 sentences (295M)
+  - news-crawl_news.2021 #          ~2,398,230 sentences (271M)
+  - news-crawl_news.2022 #          ~2,592,920 sentences (293M)
+  - news-crawl_news.2023 #          ~2,451,327 sentences (277M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/id-en-spring-2024.yml b/configs/id-en-spring-2024.yml
new file mode 100644
index 000000000..315d3d663
--- /dev/null
+++ b/configs/id-en-spring-2024.yml
@@ -0,0 +1,117 @@
+# The initial configuration was generated using:
+# task config-generator -- id en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: id
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ind
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   102,103,778 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (70,545,705 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-ind - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-ind - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-14-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-15-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-16-eng-ind - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-ind_ID - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       70,545,705 sentences
+  - opus_CCAligned/v1 #                                  15,700,345 sentences
+  - opus_OpenSubtitles/v2018 #                            9,268,181 sentences
+  - opus_XLEnt/v1.2 #                                     4,179,174 sentences
+  - opus_WikiMatrix/v1 #                                  1,019,171 sentences
+  - opus_Tanzil/v1 #                                        393,552 sentences
+  - opus_wikimedia/v20230407 #                              284,126 sentences
+  - opus_QED/v2.0a #                                        274,581 sentences
+  - opus_TED2020/v1 #                                       165,059 sentences
+  - opus_NeuLab-TedTalks/v1 #                                95,295 sentences
+  - opus_bible-uedin/v1 #                                    59,363 sentences
+  - opus_GNOME/v1 #                                          47,234 sentences
+  - opus_News-Commentary/v16 #                               18,054 sentences
+  - opus_GlobalVoices/v2018q4 #                              16,043 sentences
+  - opus_KDE4/v2 #                                           14,782 sentences
+  - opus_Tatoeba/v2023-04-12 #                               10,550 sentences
+  - opus_tico-19/v2020-10-28 #                                3,071 sentences
+  - opus_ELRC-3049-wikipedia_health/v1 #                      2,680 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           2,680 sentences
+  - opus_ELRC_2922/v1 #                                       2,679 sentences
+  - opus_tldr-pages/v2023-08-29 #                             1,453 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-ind #             ~3,117,009 sentences (352.2 MB)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-src: []
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/lt-en-spring-2024.yml b/configs/lt-en-spring-2024.yml
new file mode 100644
index 000000000..be55f5214
--- /dev/null
+++ b/configs/lt-en-spring-2024.yml
@@ -0,0 +1,192 @@
+# The initial configuration was generated using:
+# task config-generator -- lt en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: lt
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-lit
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt19/dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt19
+
+  # The training data contains:
+  #   76,643,900 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (23,298,470 sentences)
+  #  - opus_ELRC-3069-wikipedia_health/v1 - not enough data  (136 sentences)
+  #  - opus_ELRC-wikipedia_health/v1 - not enough data  (136 sentences)
+  #  - opus_ELRC_2922/v1 - not enough data  (135 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-president_lithuania-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-www.lrs.lt-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-www.lb.lt-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-kam.lt-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-lit - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-lit - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-lit - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-lit - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-3-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-lit - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-lit - duplicate with opus
+  #  - mtdata_Statmt-europarl-9-lit-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-lit-eng - duplicate with opus
+  #  - mtdata_Statmt-europarl-10-lit-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-lit_LT - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-lit - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       23,298,470 sentences
+  - opus_ParaCrawl/v9 #                                  13,192,237 sentences
+  - opus_ELRC-EMEA/v1 #                                  11,487,359 sentences
+  - opus_ELRC-4270-NTEU_TierA/v1 #                        8,061,918 sentences
+  - opus_CCAligned/v1 #                                   5,215,271 sentences
+  - opus_DGT/v2019 #                                      5,061,918 sentences
+  - opus_TildeMODEL/v2018 #                               2,084,002 sentences
+  - opus_XLEnt/v1.2 #                                     1,642,943 sentences
+  - opus_OpenSubtitles/v2018 #                            1,415,961 sentences
+  - opus_EMEA/v3 #                                        1,042,425 sentences
+  - opus_JRC-Acquis/v3.0 #                                  790,475 sentences
+  - opus_ELRC-2717-EMEA/v1 #                                764,031 sentences
+  - opus_Europarl/v8 #                                      634,284 sentences
+  - opus_EUbookshop/v2 #                                    445,813 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              177,437 sentences
+  - opus_WikiMatrix/v1 #                                    157,526 sentences
+  - opus_ELITR-ECA/v1 #                                     147,678 sentences
+  - opus_ELRC-425-Lithuanian_legislati/v1 #                 130,549 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        117,054 sentences
+  - opus_KDE4/v2 #                                          104,044 sentences
+  - opus_QED/v2.0a #                                         85,435 sentences
+  - opus_TED2020/v1 #                                        75,484 sentences
+  - opus_ECB/v1 #                                            69,805 sentences
+  - opus_bible-uedin/v1 #                                    62,187 sentences
+  - opus_GNOME/v1 #                                          59,776 sentences
+  - opus_NeuLab-TedTalks/v1 #                                45,963 sentences
+  - opus_ELRC-591-www.lb.lt/v1 #                             33,261 sentences
+  - opus_ELRC-3568-EUR_LEX_covid/v1 #                        21,390 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   21,390 sentences
+  - opus_ELRC-405-President_Lithuania/v1 #                   21,225 sentences
+  - opus_ELRA-W0160/v1 #                                     21,224 sentences
+  - opus_ELRC-2021-EUIPO_2017/v1 #                           17,133 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,133 sentences
+  - opus_wikimedia/v20230407 #                               14,454 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            13,851 sentences
+  - opus_ELRC-antibiotic/v1 #                                12,602 sentences
+  - opus_ELRC-2878-EU_publications_medi/v1 #                 12,581 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,581 sentences
+  - opus_EUconst/v1 #                                        10,171 sentences
+  - opus_ELRC-592-kam.lt/v1 #                                 8,531 sentences
+  - opus_Tatoeba/v2023-04-12 #                                8,236 sentences
+  - opus_ELRC-3609-presscorner_covid/v1 #                     6,462 sentences
+  - opus_WMT-News/v2019 #                                     5,998 sentences
+  - opus_ELRC_3382/v1 #                                       3,587 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,546 sentences
+  - opus_ELRC-3467-EC_EUROPA_covid/v1 #                       2,438 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,438 sentences
+  - opus_ELRC-590-www.lrs.lt/v1 #                             1,771 sentences
+  - opus_ELRC-3205-antibiotic/v1 #                              823 sentences
+  - opus_ELRC-3296-EUROPARL_covid/v1 #                          553 sentences
+  - opus_ELRC-2740-vaccination/v1 #                             546 sentences
+  - opus_ELRC-vaccination/v1 #                                  546 sentences
+  - opus_ELRC_2923/v1 #                                         384 sentences
+  - mtdata_ELRC-lithuanian_legislation_seimas_lithuania-1-eng-lit
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-lit
+  - mtdata_ELRC-wikipedia_health-1-eng-lit
+  - mtdata_ELRC-nteu_tierb-1-eng-lit
+  - mtdata_EU-eac_forms-1-eng-lit #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-lit #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-lit #                            ~510,025 sentences (57.6 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-lit #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-wiki_titles-1-lit-eng #                  ~15,267 sentences (1.7 MB)
+  - mtdata_Statmt-newsdev_enlt-2019-eng-lit #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_lten-2019-lit-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-lit #                    ~1,149,015 sentences (129.8 MB)
+  - mtdata_Tilde-ema-2016-eng-lit #                       ~228,287 sentences (25.8 MB)
+  - mtdata_Tilde-airbaltic-1-eng-lit #                        ~962 sentences (108.7 kB)
+  - mtdata_Tilde-rapid-2016-eng-lit #                     ~180,798 sentences (20.4 MB)
+
+  # The monolingual data contains:
+  #   ~5,442,476 sentences
+  mono-src:
+  - news-crawl_news.2019  #          ~1,079,646 sentences (122M)
+  - news-crawl_news.2020 #          ~1,088,495 sentences (123M)
+  - news-crawl_news.2021 #          ~1,008,849 sentences (114M)
+  - news-crawl_news.2022 #          ~1,079,646 sentences (122M)
+  - news-crawl_news.2023 #          ~1,185,840 sentences (134M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/lv-en-spring-2024.yml b/configs/lv-en-spring-2024.yml
new file mode 100644
index 000000000..d37e982df
--- /dev/null
+++ b/configs/lv-en-spring-2024.yml
@@ -0,0 +1,194 @@
+# The initial configuration was generated using:
+# task config-generator -- lv en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: lv
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt17/dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt17
+
+  # The training data contains:
+  #   68,374,368 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (16,685,969 sentences)
+  #  - opus_ELRC-3089-wikipedia_health/v1 - not enough data  (143 sentences)
+  #  - opus_ELRC-wikipedia_health/v1 - not enough data  (143 sentences)
+  #  - opus_ELRC_2922/v1 - not enough data  (142 sentences)
+  #  - opus_ELRA-W0308/v1 - not enough data  (108 sentences)
+  #  - opus_ELRC-648-Letter_rights_person/v1 - not enough data  (84 sentences)
+  #  - opus_ELRC-403-Rights_Arrested/v1 - not enough data  (23 sentences)
+  #  - opus_ELRA-W0301/v1 - not enough data  (20 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-mfa_latvia-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-state_latvian-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-www.visitestonia.com-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-covid19.gov.lv-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-lav - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-lav - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-lav - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-lav - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-lav-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-lav_LV - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-lav - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       16,685,969 sentences
+  - opus_ParaCrawl/v9 #                                  13,064,066 sentences
+  - opus_ELRC-EMEA/v1 #                                  11,795,507 sentences
+  - opus_ELRC-4269-NTEU_TierA/v1 #                        8,072,484 sentences
+  - opus_DGT/v2019 #                                      5,072,124 sentences
+  - opus_CCAligned/v1 #                                   4,850,972 sentences
+  - opus_TildeMODEL/v2018 #                               2,111,785 sentences
+  - opus_XLEnt/v1.2 #                                     1,295,887 sentences
+  - opus_EMEA/v3 #                                        1,030,272 sentences
+  - opus_JRC-Acquis/v3.0 #                                  793,589 sentences
+  - opus_ELRC-2729-EMEA/v1 #                                783,490 sentences
+  - opus_Europarl/v8 #                                      639,318 sentences
+  - opus_OpenSubtitles/v2018 #                              519,553 sentences
+  - opus_EUbookshop/v2 #                                    445,891 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              347,473 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        128,895 sentences
+  - opus_KDE4/v2 #                                           91,386 sentences
+  - opus_QED/v2.0a #                                         72,447 sentences
+  - opus_ECB/v1 #                                            65,374 sentences
+  - opus_ELITR-ECA/v1 #                                      64,115 sentences
+  - opus_TED2020/v1 #                                        55,488 sentences
+  - opus_ELRC-399-International_Agreem/v1 #                  40,897 sentences
+  - opus_ELRA-W0158/v1 #                                     40,896 sentences
+  - opus_ELRC-3578-EUR_LEX_covid/v1 #                        22,476 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,476 sentences
+  - opus_wikimedia/v20230407 #                               21,295 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            17,831 sentences
+  - opus_ELRC-2022-EUIPO_2017/v1 #                           17,255 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                17,255 sentences
+  - opus_bible-uedin/v1 #                                    15,885 sentences
+  - opus_ELRC-1130-www.visitestonia.com/v1 #                 13,841 sentences
+  - opus_ELRC-www.visitestonia.com/v1 #                      13,841 sentences
+  - opus_ELRC-2888-EU_publications_medi/v1 #                 13,045 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,045 sentences
+  - opus_ELRC-antibiotic/v1 #                                12,048 sentences
+  - opus_ELRC-715-Finance_Economics_Ba/v1 #                  11,600 sentences
+  - opus_ELRA-W0216/v1 #                                     11,599 sentences
+  - opus_GNOME/v1 #                                          11,265 sentences
+  - opus_EUconst/v1 #                                        10,036 sentences
+  - opus_WMT-News/v2019 #                                     8,008 sentences
+  - opus_ELRC-402-MFA_Latvia/v1 #                             7,195 sentences
+  - opus_ELRA-W0159/v1 #                                      7,194 sentences
+  - opus_ELRC-433-State_Latvian/v1 #                          6,862 sentences
+  - opus_ELRA-W0169/v1 #                                      6,861 sentences
+  - opus_ELRC-3619-presscorner_covid/v1 #                     6,686 sentences
+  - opus_ELRC_3382/v1 #                                       3,737 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,543 sentences
+  - opus_ELRC-3477-EC_EUROPA_covid/v1 #                       2,407 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,407 sentences
+  - opus_ELRC-4994-Latvian_Financial_MT/v1 #                  2,002 sentences
+  - opus_Tatoeba/v2023-04-12 #                                1,814 sentences
+  - opus_ELRC-3453-covid19.gov.lv/v1 #                          826 sentences
+  - opus_ELRC-3217-antibiotic/v1 #                              809 sentences
+  - opus_ELRC-3306-EUROPARL_covid/v1 #                          724 sentences
+  - opus_ELRC_2923/v1 #                                         580 sentences
+  - opus_ELRC-2741-vaccination/v1 #                             521 sentences
+  - opus_ELRC-vaccination/v1 #                                  521 sentences
+  - mtdata_ELRC-international_agreements-1-eng-lav
+  - mtdata_ELRC-rights_arrested-1-eng-lav
+  - mtdata_ELRC-letter_rights_persons_arrested_or_detained-1-eng-lav
+  - mtdata_ELRC-finance_economics_bank_latvia-1-eng-lav
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-lav
+  - mtdata_ELRC-wikipedia_health-1-eng-lav
+  - mtdata_ELRC-nteu_tierb-1-eng-lav
+  - mtdata_EU-eac_forms-1-eng-lav #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-lav #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-lav #                            ~524,054 sentences (59.2 MB)
+  - mtdata_Statmt-newsdev_lven-2017-lav-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_enlv-2017-eng-lav #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-lav #                    ~1,122,956 sentences (126.9 MB)
+  - mtdata_Tilde-ema-2016-eng-lav #                       ~231,439 sentences (26.2 MB)
+  - mtdata_Tilde-airbaltic-1-eng-lav #                      ~1,050 sentences (118.7 kB)
+  - mtdata_Tilde-fold-1-eng-lav #                          ~10,070 sentences (1.1 MB)
+  - mtdata_Tilde-rapid-2016-eng-lav #                     ~198,906 sentences (22.5 MB)
+
+  # The monolingual data contains:
+  #   ~3,283,185 sentences
+  mono-src:
+  - news-crawl_news.2015  #          ~1,274,336 sentences (144M)
+  - news-crawl_news.2016 #          ~1,017,699 sentences (115M)
+  - news-crawl_news.2017 #            ~991,150 sentences (112M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/ro-en-spring-2024.yml b/configs/ro-en-spring-2024.yml
new file mode 100644
index 000000000..669a8501c
--- /dev/null
+++ b/configs/ro-en-spring-2024.yml
@@ -0,0 +1,219 @@
+# The initial configuration was generated using:
+# task config-generator -- ro en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: ro
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-ron
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt16/dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt16
+
+  # The training data contains:
+  #   174,698,415 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (55,607,023 sentences)
+  #  - opus_ELRA-W0308/v1 - not enough data  (92 sentences)
+  #  - opus_ELRC-648-Letter_rights_person/v1 - not enough data  (77 sentences)
+  #  - opus_ELRC-403-Rights_Arrested/v1 - not enough data  (24 sentences)
+  #  - opus_ELRA-W0301/v1 - not enough data  (21 sentences)
+  #  - opus_tldr-pages/v2023-08-29 - not enough data  (9 sentences)
+  #  - opus_ELRC-417-Swedish_Work_Environ/v1 - not enough data  (8 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-romanian_literature-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-romanian_wikipedia-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-romanian_news-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eir_spos-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eir_newsletter-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eir-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-ron - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-ron - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-ron - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-ron - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-ron - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-ron - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-ron-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-ron_RO - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       55,607,023 sentences
+  - opus_OpenSubtitles/v2018 #                           50,693,226 sentences
+  - opus_ParaCrawl/v9 #                                  25,048,962 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,648,577 sentences
+  - opus_CCAligned/v1 #                                  10,525,602 sentences
+  - opus_DGT/v2019 #                                      3,541,661 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,421,073 sentences
+  - opus_XLEnt/v1.2 #                                     3,337,016 sentences
+  - opus_TildeMODEL/v2018 #                               1,925,419 sentences
+  - opus_EMEA/v3 #                                          994,499 sentences
+  - opus_ELRC-2728-EMEA/v1 #                                783,742 sentences
+  - opus_WikiMatrix/v1 #                                    631,486 sentences
+  - opus_JRC-Acquis/v3.0 #                                  455,171 sentences
+  - opus_QED/v2.0a #                                        438,832 sentences
+  - opus_Europarl/v8 #                                      400,356 sentences
+  - opus_Wikipedia/v1.0 #                                   360,499 sentences
+  - opus_TED2020/v1 #                                       328,491 sentences
+  - opus_EUbookshop/v2 #                                    324,553 sentences
+  - opus_wikimedia/v20230407 #                              323,049 sentences
+  - opus_SETIMES/v2 #                                       213,047 sentences
+  - opus_NeuLab-TedTalks/v1 #                               196,122 sentences
+  - opus_TED2013/v1.1 #                                     158,483 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        153,650 sentences
+  - opus_Tanzil/v1 #                                        136,175 sentences
+  - opus_ELRC-492-Romanian_Wikipedia/v1 #                   132,230 sentences
+  - opus_ELRA-W0193/v1 #                                    132,229 sentences
+  - opus_KDE4/v2 #                                          114,741 sentences
+  - opus_ELRC-493-Romanian_news/v1 #                         98,099 sentences
+  - opus_ELRA-W0194/v1 #                                     98,098 sentences
+  - opus_ELITR-ECA/v1 #                                      92,826 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_PHP/v1 #                                            30,391 sentences
+  - opus_GNOME/v1 #                                          25,419 sentences
+  - opus_ELRC-3577-EUR_LEX_covid/v1 #                        23,183 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   23,183 sentences
+  - opus_ELRC-1177-EUIPO_2017/v1 #                           20,298 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                20,298 sentences
+  - opus_Tatoeba/v2023-04-12 #                               16,308 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          13,252 sentences
+  - opus_ELRC-2887-EU_publications_medi/v1 #                 13,164 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,164 sentences
+  - opus_ELRC-930-studies_reports_stat/v1 #                  12,043 sentences
+  - opus_ELRA-W0270/v1 #                                     12,042 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            10,906 sentences
+  - opus_WMT-News/v2019 #                                     7,996 sentences
+  - opus_ELRC-3618-presscorner_covid/v1 #                     6,715 sentences
+  - opus_ELRC-435-Romanian_New_Crimina/v1 #                   6,496 sentences
+  - opus_ELRA-W0170/v1 #                                      6,495 sentences
+  - opus_ELRC-491-Romanian_literature/v1 #                    5,281 sentences
+  - opus_ELRA-W0192/v1 #                                      5,280 sentences
+  - opus_ELRC-1819-EIR/v1 #                                   4,994 sentences
+  - opus_GlobalVoices/v2018q4 #                               4,454 sentences
+  - opus_ELRC-1992-Rural_Development_Pr/v1 #                  4,186 sentences
+  - opus_ELRC-Rural_Development/v1 #                          4,186 sentences
+  - opus_ELRC-654-Romanian_Ombudsman_a/v1 #                   4,148 sentences
+  - opus_ELRA-W0206/v1 #                                      4,147 sentences
+  - opus_ELRC-1815-EIR_Newsletter/v1 #                        3,788 sentences
+  - opus_ELRC_3382/v1 #                                       3,674 sentences
+  - opus_ELRC-1814-EIR_SPOS/v1 #                              3,248 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,556 sentences
+  - opus_ELRC-3476-EC_EUROPA_covid/v1 #                       2,338 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,338 sentences
+  - opus_ELRC-3216-antibiotic/v1 #                            1,035 sentences
+  - opus_ELRC-antibiotic/v1 #                                 1,035 sentences
+  - opus_ELRC-3087-wikipedia_health/v1 #                        693 sentences
+  - opus_ELRC_2922/v1 #                                         692 sentences
+  - opus_ELRC-3305-EUROPARL_covid/v1 #                          546 sentences
+  - opus_ELRC-2750-vaccination/v1 #                             496 sentences
+  - opus_ELRC-vaccination/v1 #                                  496 sentences
+  - opus_ELRC_2923/v1 #                                         319 sentences
+  - mtdata_ELRC-rights_arrested-1-eng-ron
+  - mtdata_ELRC-swedish_work_environment-1-eng-ron
+  - mtdata_ELRC-romanian_new_criminal_procedure_code-1-eng-ron
+  - mtdata_ELRC-letter_rights_persons_arrested_or_detained-1-eng-ron
+  - mtdata_ELRC-romanian_ombudsman_archive-1-eng-ron
+  - mtdata_ELRC-studies_reports_statistical_culture_institute_cultural_research_training-1-eng-ron
+  - mtdata_ELRC-rural_development_programme_romania-1-eng-ron
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-ron
+  - mtdata_EU-eac_forms-1-eng-ron #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-ron #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-ron #                            ~389,297 sentences (44.0 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-ron #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-newsdev_enro-2016-eng-ron #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_roen-2016-ron-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-eesc-2017-eng-ron #                    ~1,026,056 sentences (115.9 MB)
+  - mtdata_Tilde-ema-2016-eng-ron #                       ~229,130 sentences (25.9 MB)
+  - mtdata_Tilde-ecb-2017-eng-ron #                         ~1,778 sentences (200.9 kB)
+  - mtdata_Tilde-rapid-2016-eng-ron #                     ~196,150 sentences (22.2 MB)
+  - mtdata_Tilde-worldbank-1-eng-ron #                      ~6,413 sentences (724.7 kB)
+
+  # The monolingual data contains:
+  #   ~24,920,348 sentences
+  mono-src:
+  - news-crawl_news.2015  #          ~1,088,495 sentences (123M)
+  - news-crawl_news.2016 #          ~2,061,946 sentences (233M)
+  - news-crawl_news.2017 #          ~2,247,787 sentences (254M)
+  - news-crawl_news.2018 #          ~1,345,132 sentences (152M)
+  - news-crawl_news.2019 #          ~3,283,185 sentences (371M)
+  - news-crawl_news.2020 #          ~3,982,300 sentences (450M)
+  - news-crawl_news.2021 #          ~3,353,982 sentences (379M)
+  - news-crawl_news.2022 #          ~3,831,858 sentences (433M)
+  - news-crawl_news.2023 #          ~3,725,663 sentences (421M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/ru-en-spring-2024.yml b/configs/ru-en-spring-2024.yml
new file mode 100644
index 000000000..4c891ec35
--- /dev/null
+++ b/configs/ru-en-spring-2024.yml
@@ -0,0 +1,181 @@
+# The initial configuration was generated using:
+# task config-generator -- ru en --name spring-2024 --fast
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/e880dbefe77f3428aed2d8ccc4f840fe347b025b/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: ru
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: one-stage
+  pretrained-models:
+    train-backwards:
+      urls:
+      - https://storage.googleapis.com/releng-translations-dev/models/ru-en/better-teacher/student
+      mode: use
+      type: default
+datasets:
+
+  # Skipped test/devtest datasets:
+  #  - mtedx/valid - variant dataset
+  #  - mtedx/test - variant dataset
+  #  - wmt20/tworefs - variant dataset
+  #  - wmt18/test-ts - variant dataset
+  #  - wmt14/full - variant dataset
+  devtest:
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt19
+  - sacrebleu_aug-mix_wmt17
+  - sacrebleu_aug-mix_wmt15
+  - sacrebleu_aug-mix_wmt13
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt20
+  - sacrebleu_wmt18
+  - sacrebleu_wmt16
+  - sacrebleu_wmt14
+
+  # The training data contains:
+  #   250,111,081 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (139,937,785 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_ELRC-3855-SWPS_University_Soci/v1 - not enough data  (109 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - opus_WikiTitles/v3 - ignored datasets (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-rus - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-rus - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-rus - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-rus - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_test-1-eng-rus - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_dev-1-eng-rus - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-1_bonus-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-14-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-15-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-news_commentary-16-eng-rus - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-rus_RU - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                      139,937,785 sentences
+  - opus_OpenSubtitles/v2018 #                           25,910,105 sentences
+  - opus_UNPC/v1.0 #                                     25,173,398 sentences
+  - opus_CCAligned/v1 #                                  13,850,305 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                  13,565,182 sentences
+  - opus_MultiUN/v1 #                                    11,654,416 sentences
+  - opus_XLEnt/v1.2 #                                     7,890,088 sentences
+  - opus_ParaCrawl/v9 #                                   5,378,016 sentences
+  - opus_WikiMatrix/v1 #                                  1,661,909 sentences
+  - opus_Tanzil/v1 #                                      1,067,840 sentences
+  - opus_Wikipedia/v1.0 #                                   572,717 sentences
+  - opus_QED/v2.0a #                                        563,700 sentences
+  - opus_wikimedia/v20230407 #                              541,583 sentences
+  - opus_Tatoeba/v2023-04-12 #                              540,675 sentences
+  - opus_TED2020/v1 #                                       390,015 sentences
+  - opus_News-Commentary/v16 #                              265,809 sentences
+  - opus_NeuLab-TedTalks/v1 #                               221,999 sentences
+  - opus_KDE4/v2 #                                          180,793 sentences
+  - opus_GlobalVoices/v2018q4 #                             170,351 sentences
+  - opus_TED2013/v1.1 #                                     133,660 sentences
+  - opus_ELRC-5183-SciPar_Ukraine/v1 #                      126,585 sentences
+  - opus_infopankki/v1 #                                     75,305 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_EUbookshop/v2 #                                     49,830 sentences
+  - opus_WMT-News/v2019 #                                    36,637 sentences
+  - opus_PHP/v1 #                                            30,064 sentences
+  - opus_Books/v1 #                                          17,496 sentences
+  - opus_TildeMODEL/v2018 #                                  10,977 sentences
+  - opus_MDN_Web_Docs/v2023-09-25 #                           8,134 sentences
+  - opus_ada83/v1 #                                           4,122 sentences
+  - opus_ELRC-3075-wikipedia_health/v1 #                      4,073 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           4,073 sentences
+  - opus_ELRC_2922/v1 #                                       4,072 sentences
+  - opus_tico-19/v2020-10-28 #                                3,071 sentences
+  - opus_ELRC-5067-SciPar/v1 #                                3,064 sentences
+  - opus_tldr-pages/v2023-08-29 #                             1,037 sentences
+  - mtdata_Statmt-commoncrawl_wmt13-1-rus-eng
+  - mtdata_Statmt-news_commentary_wmt18-13-rus-eng
+  - mtdata_Statmt-wiki_titles-1-rus-eng
+  - mtdata_Statmt-wiki_titles-2-rus-eng
+  - mtdata_Tilde-airbaltic-1-eng-rus
+  - mtdata_Tilde-czechtourism-1-eng-rus
+  - mtdata_Tilde-worldbank-1-eng-rus
+  - mtdata_UN-un_dev-1-eng-rus
+  - mtdata_UN-un_test-1-eng-rus
+
+  # The monolingual data contains:
+  #   ~90,385,836 sentences
+  mono-src:
+  - news-crawl_news.2008  #             ~19,469 sentences (2.2M)
+  - news-crawl_news.2009 #             ~47,787 sentences (5.4M)
+  - news-crawl_news.2011 #          ~4,876,106 sentences (551M)
+  - news-crawl_news.2012 #          ~5,079,646 sentences (574M)
+  - news-crawl_news.2013 #          ~7,327,433 sentences (828M)
+  - news-crawl_news.2014 #          ~6,194,690 sentences (700M)
+  - news-crawl_news.2015 #          ~5,433,628 sentences (614M)
+  - news-crawl_news.2016 #          ~3,716,814 sentences (420M)
+  - news-crawl_news.2017 #          ~4,451,327 sentences (503M)
+  - news-crawl_news.2018 #          ~4,539,823 sentences (513M)
+  - news-crawl_news.2019 #          ~6,955,752 sentences (786M)
+  - news-crawl_news.2020 #          ~8,849,557 sentences (1.0G)
+  - news-crawl_news.2021 #          ~8,115,044 sentences (917M)
+  - news-crawl_news.2022 #          ~8,849,557 sentences (1.0G)
+  - news-crawl_news.2023 #         ~15,929,203 sentences (1.8G)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/sk-en-spring-2024.yml b/configs/sk-en-spring-2024.yml
new file mode 100644
index 000000000..30d52d8ae
--- /dev/null
+++ b/configs/sk-en-spring-2024.yml
@@ -0,0 +1,180 @@
+# The initial configuration was generated using:
+# task config-generator -- sk en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: sk
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-slk
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   111,168,672 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (38,096,241 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_ELRC-3076-wikipedia_health/v1 - not enough data  (134 sentences)
+  #  - opus_ELRC-wikipedia_health/v1 - not enough data  (134 sentences)
+  #  - opus_ELRC_2922/v1 - not enough data  (133 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-culture_slovak-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-justice_slovak-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-slk - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-slk - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-slk - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-slk - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-slk - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-slk - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-slk-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-slk_SK - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-slk - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       38,096,241 sentences
+  - opus_ParaCrawl/v9 #                                  22,902,149 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,032,452 sentences
+  - opus_OpenSubtitles/v2018 #                            8,850,871 sentences
+  - opus_ELRC-4154-NTEU_TierA/v1 #                        7,922,512 sentences
+  - opus_CCAligned/v1 #                                   6,938,181 sentences
+  - opus_DGT/v2019 #                                      5,118,830 sentences
+  - opus_XLEnt/v1.2 #                                     2,594,162 sentences
+  - opus_TildeMODEL/v2018 #                               2,190,889 sentences
+  - opus_EMEA/v3 #                                        1,054,178 sentences
+  - opus_ELRC-2721-EMEA/v1 #                                780,098 sentences
+  - opus_Europarl/v8 #                                      639,958 sentences
+  - opus_EUbookshop/v2 #                                    452,097 sentences
+  - opus_ELITR-ECA/v1 #                                     294,356 sentences
+  - opus_WikiMatrix/v1 #                                    178,985 sentences
+  - opus_QED/v2.0a #                                        173,727 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        142,656 sentences
+  - opus_ECB/v1 #                                           122,131 sentences
+  - opus_TED2020/v1 #                                       106,067 sentences
+  - opus_KDE4/v2 #                                          105,425 sentences
+  - opus_NeuLab-TedTalks/v1 #                                67,607 sentences
+  - opus_bible-uedin/v1 #                                    62,159 sentences
+  - opus_ELRC-5067-SciPar/v1 #                               60,468 sentences
+  - opus_JRC-Acquis/v3.0 #                                   35,744 sentences
+  - opus_PHP/v1 #                                            31,173 sentences
+  - opus_ELRC-3570-EUR_LEX_covid/v1 #                        22,479 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,479 sentences
+  - opus_wikimedia/v20230407 #                               18,819 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            16,369 sentences
+  - opus_ELRC-1179-EUIPO_2017/v1 #                           16,313 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                16,313 sentences
+  - opus_ELRC-2880-EU_publications_medi/v1 #                 12,927 sentences
+  - opus_ELRC-EU_publications/v1 #                           12,927 sentences
+  - opus_EUconst/v1 #                                        10,119 sentences
+  - opus_ELRC-1072-annual_reports_immig/v1 #                  8,041 sentences
+  - opus_ELRA-W0136/v1 #                                      8,040 sentences
+  - opus_ELRC-3611-presscorner_covid/v1 #                     6,982 sentences
+  - opus_ELRC-1073-annual_reports_Slova/v1 #                  6,008 sentences
+  - opus_ELRA-W0137/v1 #                                      6,007 sentences
+  - opus_ELRC-1074-annual_reports_Stati/v1 #                  5,614 sentences
+  - opus_ELRC_3382/v1 #                                       3,624 sentences
+  - opus_ELRC-488-Justice_Slovak/v1 #                         2,896 sentences
+  - opus_ELRA-W0189/v1 #                                      2,895 sentences
+  - opus_ELRC-487-Culture_Slovak/v1 #                         2,610 sentences
+  - opus_ELRA-W0188/v1 #                                      2,609 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,526 sentences
+  - opus_ELRC-3469-EC_EUROPA_covid/v1 #                       2,409 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,409 sentences
+  - opus_ELRC-3209-antibiotic/v1 #                            1,010 sentences
+  - opus_ELRC-antibiotic/v1 #                                 1,010 sentences
+  - opus_ELRC-3298-EUROPARL_covid/v1 #                          653 sentences
+  - opus_ELRC-2745-vaccination/v1 #                             510 sentences
+  - opus_ELRC-vaccination/v1 #                                  510 sentences
+  - opus_ELRC_2923/v1 #                                         448 sentences
+  - mtdata_ELRC-annual_reports_immigration_asylum_policies_emn_contact_point_slovak-1-eng-slk
+  - mtdata_ELRC-annual_reports_slovak_centre_human_rights-1-eng-slk
+  - mtdata_ELRC-annual_reports_statistical_slovak-1-eng-slk
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-slk
+  - mtdata_ELRC-wikipedia_health-1-eng-slk
+  - mtdata_ELRC-nteu_tierb-1-eng-slk
+  - mtdata_EU-eac_forms-1-eng-slk #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-slk #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-slk #                            ~548,757 sentences (62.0 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-slk #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-slk #                    ~1,269,685 sentences (143.5 MB)
+  - mtdata_Tilde-ema-2016-eng-slk #                       ~238,237 sentences (26.9 MB)
+  - mtdata_Tilde-rapid-2016-eng-slk #                     ~214,164 sentences (24.2 MB)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-src: []
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/sl-en-spring-2024.yml b/configs/sl-en-spring-2024.yml
new file mode 100644
index 000000000..1b3a5613d
--- /dev/null
+++ b/configs/sl-en-spring-2024.yml
@@ -0,0 +1,182 @@
+# The initial configuration was generated using:
+# task config-generator -- sl en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: sl
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-slv
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   89,057,699 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (27,406,782 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (1,875,517 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-euipo_2017-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-slv - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-slv - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-slv - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-slv - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-slv - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-slv-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-slv_SI - duplicate with opus
+  #  - mtdata_Tilde-ecb-2017-eng-slv - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       27,406,782 sentences
+  - opus_OpenSubtitles/v2018 #                           19,641,457 sentences
+  - opus_ELRC-EMEA/v1 #                                  13,288,992 sentences
+  - opus_ParaCrawl/v9 #                                   9,516,259 sentences
+  - opus_DGT/v2019 #                                      5,125,455 sentences
+  - opus_CCAligned/v1 #                                   4,366,555 sentences
+  - opus_TildeMODEL/v2018 #                               2,048,216 sentences
+  - opus_MaCoCu/v2 #                                      1,875,518 sentences
+  - opus_EMEA/v3 #                                        1,045,041 sentences
+  - opus_XLEnt/v1.2 #                                       861,509 sentences
+  - opus_ELRC-2727-EMEA/v1 #                                766,139 sentences
+  - opus_Europarl/v8 #                                      624,803 sentences
+  - opus_EUbookshop/v2 #                                    405,653 sentences
+  - opus_WikiMatrix/v1 #                                    318,028 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              300,017 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        150,090 sentences
+  - opus_Wikipedia/v1.0 #                                   140,124 sentences
+  - opus_KDE4/v2 #                                          119,645 sentences
+  - opus_ELITR-ECA/v1 #                                     102,934 sentences
+  - opus_ECB/v1 #                                            89,634 sentences
+  - opus_QED/v2.0a #                                         79,196 sentences
+  - opus_wikimedia/v20230407 #                               73,612 sentences
+  - opus_ELRC-490-Secretariat_General_/v1 #                  63,070 sentences
+  - opus_ELRA-W0191/v1 #                                     63,069 sentences
+  - opus_bible-uedin/v1 #                                    62,195 sentences
+  - opus_ELRC-489-Secretariat_General_/v1 #                  55,185 sentences
+  - opus_ELRA-W0190/v1 #                                     55,184 sentences
+  - opus_JRC-Acquis/v3.0 #                                   53,390 sentences
+  - opus_TED2020/v1 #                                        44,340 sentences
+  - opus_PHP/v1 #                                            30,550 sentences
+  - opus_NeuLab-TedTalks/v1 #                                22,856 sentences
+  - opus_ELRC-3576-EUR_LEX_covid/v1 #                        22,381 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,381 sentences
+  - opus_ELRC-1180-EUIPO_2017/v1 #                           19,767 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                19,767 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          17,519 sentences
+  - opus_ELRC-antibiotic/v1 #                                16,166 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            15,269 sentences
+  - opus_TED2013/v1.1 #                                      14,960 sentences
+  - opus_ELRC-2886-EU_publications_medi/v1 #                 13,209 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,209 sentences
+  - opus_ELRC-924-statistical_reports_/v1 #                  11,860 sentences
+  - opus_ELRC-statistical_reports/v1 #                       11,860 sentences
+  - opus_ELRA-W0267/v1 #                                     11,859 sentences
+  - opus_EUconst/v1 #                                         8,807 sentences
+  - opus_GNOME/v1 #                                           8,070 sentences
+  - opus_ELRC-3617-presscorner_covid/v1 #                     6,995 sentences
+  - opus_ELRC-923-chapters_Youth_2010/v1 #                    5,866 sentences
+  - opus_Tatoeba/v2023-04-12 #                                4,302 sentences
+  - opus_ELRC_3382/v1 #                                       3,633 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,546 sentences
+  - opus_ELRC-3475-EC_EUROPA_covid/v1 #                       2,534 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  2,534 sentences
+  - opus_ELRC-2328-Agriculture_Forestry/v1 #                  1,551 sentences
+  - opus_ELRC-3215-antibiotic/v1 #                              986 sentences
+  - opus_ELRC-3085-wikipedia_health/v1 #                        960 sentences
+  - opus_ELRC_2922/v1 #                                         959 sentences
+  - opus_ELRC-3304-EUROPARL_covid/v1 #                          816 sentences
+  - opus_ELRC-2737-vaccination/v1 #                             492 sentences
+  - opus_ELRC-vaccination/v1 #                                  492 sentences
+  - opus_ELRC_2923/v1 #                                         451 sentences
+  - mtdata_ELRC-secretariat_general_part1-1-eng-slv
+  - mtdata_ELRC-secretariat_general_part2-1-eng-slv
+  - mtdata_ELRC-chapters_youth_2010_social_profile_young_people_slovenia_publication-1-eng-slv
+  - mtdata_ELRC-statistical_reports_statistical_slovenia-1-eng-slv
+  - mtdata_ELRC-agriculture_forestry_food_slovenia-1-eng-slv
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-slv
+  - mtdata_EU-eac_forms-1-eng-slv #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-slv #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-slv #                            ~539,490 sentences (61.0 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-slv #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-eesc-2017-eng-slv #                    ~1,116,707 sentences (126.2 MB)
+  - mtdata_Tilde-ema-2016-eng-slv #                       ~223,681 sentences (25.3 MB)
+  - mtdata_Tilde-rapid-2016-eng-slv #                     ~203,695 sentences (23.0 MB)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-src: []
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/sr-en-spring-2024.yml b/configs/sr-en-spring-2024.yml
new file mode 100644
index 000000000..4d07e66e7
--- /dev/null
+++ b/configs/sr-en-spring-2024.yml
@@ -0,0 +1,131 @@
+# The initial configuration was generated using:
+# task config-generator -- sr en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: sr
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-srp
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   78,565,711 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (26,510,872 sentences)
+  #  - opus_MultiHPLT/v1.1 - ignored datasets (3,904,384 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (114 sentences)
+  #  - opus_ELRC-416-Swedish_Social_Secur/v1 - not enough data  (113 sentences)
+  #  - opus_tldr-pages/v2023-08-29 - not enough data  (26 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-srp - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-srp - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-srp - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-srp_RS - duplicate with opus
+  train:
+  - opus_OpenSubtitles/v2018  #                           42,635,098 sentences
+  - opus_NLLB/v1 #                                       26,510,872 sentences
+  - opus_HPLT/v1.1 #                                      3,904,423 sentences
+  - opus_CCAligned/v1 #                                   1,992,692 sentences
+  - opus_XLEnt/v1.2 #                                     1,474,447 sentences
+  - opus_WikiMatrix/v1 #                                    395,569 sentences
+  - opus_GoURMET/v1 #                                       329,004 sentences
+  - opus_QED/v2.0a #                                        284,942 sentences
+  - opus_TED2020/v1 #                                       260,966 sentences
+  - opus_SETIMES/v2 #                                       225,169 sentences
+  - opus_wikimedia/v20230407 #                              217,199 sentences
+  - opus_NeuLab-TedTalks/v1 #                               152,477 sentences
+  - opus_bible-uedin/v1 #                                    62,131 sentences
+  - opus_KDE4/v2 #                                           60,827 sentences
+  - opus_Tatoeba/v2023-04-12 #                               21,760 sentences
+  - opus_GlobalVoices/v2018q4 #                              20,309 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          12,707 sentences
+  - opus_TildeMODEL/v2018 #                                   2,024 sentences
+  - opus_EUbookshop/v2 #                                      1,608 sentences
+  - opus_ELRC-3041-wikipedia_health/v1 #                        744 sentences
+  - opus_ELRC_2922/v1 #                                         743 sentences
+  - mtdata_ELRC-swedish_social_security-1-eng-srp
+  - mtdata_Neulab-tedtalks_test-1-eng-srp #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Tilde-worldbank-1-eng-srp #                      ~2,533 sentences (286.3 kB)
+
+  # The monolingual data contains:
+  #   ~35,920,209 sentences
+  mono-src:
+  - news-crawl_news.2008  #              ~3,522 sentences (398K)
+  - news-crawl_news.2009 #             ~18,584 sentences (2.1M)
+  - news-crawl_news.2010 #              ~9,734 sentences (1.1M)
+  - news-crawl_news.2011 #              ~2,530 sentences (286K)
+  - news-crawl_news.2018 #             ~18,584 sentences (2.1M)
+  - news-crawl_news.2019 #          ~1,929,203 sentences (218M)
+  - news-crawl_news.2020 #          ~5,619,469 sentences (635M)
+  - news-crawl_news.2021 #          ~8,849,557 sentences (1.0G)
+  - news-crawl_news.2022 #          ~9,734,513 sentences (1.1G)
+  - news-crawl_news.2023 #          ~9,734,513 sentences (1.1G)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/sv-en-spring-2024.yml b/configs/sv-en-spring-2024.yml
new file mode 100644
index 000000000..bd44178e6
--- /dev/null
+++ b/configs/sv-en-spring-2024.yml
@@ -0,0 +1,239 @@
+# The initial configuration was generated using:
+# task config-generator -- sv en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: sv
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Lindat-khresmoi_summary_dev-2-eng-swe
+  - mtdata_Neulab-tedtalks_dev-1-eng-swe
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   211,400,324 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (77,008,059 sentences)
+  #  - opus_RF/v1 - not enough data  (180 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-swedish_labour_part2-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-swedish_labour_part1-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-swedish_food-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.norden.org-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.sida.se-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.visitestonia.com-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.vtv.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-valtioneuvosto.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-vnk.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-euipo_2017-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.turku.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-www.vero.fi-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-emea-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-vaccination-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-wikipedia_health-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-antibiotic-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-europarl_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-ec_europa_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-eur_lex_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-presscorner_covid-1-eng-swe - duplicate with opus
+  #  - mtdata_ELRC-nteu_tiera-1-eng-swe - duplicate with opus
+  #  - mtdata_EU-ecdc-1-eng-swe - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-swe - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-swe - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-6-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-7.1-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-8-eng-swe - duplicate with opus
+  #  - mtdata_ParaCrawl-paracrawl-9-eng-swe - duplicate with opus
+  #  - mtdata_Statmt-europarl-7-swe-eng - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-swe_SE - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       77,008,059 sentences
+  - opus_ParaCrawl/v9 #                                  49,110,322 sentences
+  - opus_OpenSubtitles/v2018 #                           17,660,152 sentences
+  - opus_ELRC-4268-NTEU_TierA/v1 #                       12,737,597 sentences
+  - opus_CCAligned/v1 #                                  12,544,114 sentences
+  - opus_ELRC-EMEA/v1 #                                  12,083,941 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   8,058,690 sentences
+  - opus_DGT/v2019 #                                      5,139,521 sentences
+  - opus_XLEnt/v1.2 #                                     3,674,011 sentences
+  - opus_TildeMODEL/v2018 #                               3,234,207 sentences
+  - opus_EUbookshop/v2 #                                  1,915,479 sentences
+  - opus_Europarl/v8 #                                    1,892,723 sentences
+  - opus_EMEA/v3 #                                        1,086,217 sentences
+  - opus_JRC-Acquis/v3.0 #                                  792,924 sentences
+  - opus_ELRC-2725-EMEA/v1 #                                759,846 sentences
+  - opus_ELRC-5067-SciPar/v1 #                              670,816 sentences
+  - opus_WikiMatrix/v1 #                                    546,289 sentences
+  - opus_ELITR-ECA/v1 #                                     389,808 sentences
+  - opus_KDE4/v2 #                                          232,485 sentences
+  - opus_QED/v2.0a #                                        171,126 sentences
+  - opus_ELRC-presscorner_covid/v1 #                        147,973 sentences
+  - opus_Tanzil/v1 #                                        127,493 sentences
+  - opus_ELRC-Swedish_Migration/v1 #                        124,398 sentences
+  - opus_TED2020/v1 #                                       120,718 sentences
+  - opus_ELRC-www.turku.fi/v1 #                             107,773 sentences
+  - opus_NeuLab-TedTalks/v1 #                                69,332 sentences
+  - opus_wikimedia/v20230407 #                               63,135 sentences
+  - opus_bible-uedin/v1 #                                    62,137 sentences
+  - opus_infopankki/v1 #                                     51,749 sentences
+  - opus_ELRC-1770-valtioneuvosto.fi/v1 #                    49,084 sentences
+  - opus_ELRC-valtioneuvosto.fi/v1 #                         49,084 sentences
+  - opus_ELRC-1133-www.vtv.fi/v1 #                           46,501 sentences
+  - opus_ELRC-www.vtv.fi/v1 #                                46,501 sentences
+  - opus_ELRC-734-www.norden.org/v1 #                        37,763 sentences
+  - opus_ELRC-www.norden.org/v1 #                            37,763 sentences
+  - opus_ELRC-1772-vnk.fi/v1 #                               33,627 sentences
+  - opus_ELRC-vnk.fi/v1 #                                    33,627 sentences
+  - opus_WikiSource/v1 #                                     33,283 sentences
+  - opus_ELRC-817-Swedish_Audit_Riksre/v1 #                  30,352 sentences
+  - opus_PHP/v1 #                                            30,198 sentences
+  - opus_Tatoeba/v2023-04-12 #                               27,050 sentences
+  - opus_ELRC-3574-EUR_LEX_covid/v1 #                        22,445 sentences
+  - opus_ELRC-EUR_LEX/v1 #                                   22,445 sentences
+  - opus_ELRC-2037-www.vero.fi/v1 #                          22,317 sentences
+  - opus_ELRC-www.vero.fi/v1 #                               22,317 sentences
+  - opus_ELRC-2026-EUIPO_2017/v1 #                           16,947 sentences
+  - opus_ELRC-EUIPO_2017/v1 #                                16,947 sentences
+  - opus_ELRC-EUROPARL_covid/v1 #                            15,681 sentences
+  - opus_ELRC-antibiotic/v1 #                                15,452 sentences
+  - opus_ELRC-1131-www.visitestonia.com/v1 #                 14,335 sentences
+  - opus_ELRC-www.visitestonia.com/v1 #                      14,335 sentences
+  - opus_ELRC-2884-EU_publications_medi/v1 #                 13,100 sentences
+  - opus_ELRC-EU_publications/v1 #                           13,100 sentences
+  - opus_ELRC-802-Swedish_Competition_/v1 #                  11,374 sentences
+  - opus_ELRC-wikipedia_health/v1 #                          10,395 sentences
+  - opus_ELRC-928-Annual_Reports_Swedi/v1 #                  10,227 sentences
+  - opus_EUconst/v1 #                                         9,954 sentences
+  - opus_ELRC-2033-www.turku.fi/v1 #                          9,706 sentences
+  - opus_GlobalVoices/v2018q4 #                               8,793 sentences
+  - opus_ELRC-829-Swedish_Migration_Bo/v1 #                   8,366 sentences
+  - opus_ELRA-W0239/v1 #                                      8,365 sentences
+  - opus_ELRC-417-Swedish_Work_Environ/v1 #                   7,475 sentences
+  - opus_ELRC-3615-presscorner_covid/v1 #                     6,856 sentences
+  - opus_ELRC-744-Finnish_Information_/v1 #                   6,819 sentences
+  - opus_ELRC-Finnish_Information/v1 #                        6,819 sentences
+  - opus_ELRA-W0222/v1 #                                      6,818 sentences
+  - opus_ELRC_3382/v1 #                                       3,760 sentences
+  - opus_Books/v1 #                                           3,095 sentences
+  - opus_ELRC-Swedish_Labour/v1 #                             2,778 sentences
+  - opus_ECDC/v2016-03-16 #                                   2,528 sentences
+  - opus_ELRC-1013-Sweden_a_Pocket/v1 #                       2,200 sentences
+  - opus_ELRA-W0130/v1 #                                      2,199 sentences
+  - opus_ELRC-712-Social_Insurance_Frs/v1 #                   1,953 sentences
+  - opus_ELRA-W0213/v1 #                                      1,952 sentences
+  - opus_ELRC-3473-EC_EUROPA_covid/v1 #                       1,858 sentences
+  - opus_ELRC-EC_EUROPA/v1 #                                  1,858 sentences
+  - opus_ELRC-401-Swedish_Labour_Part2/v1 #                   1,768 sentences
+  - opus_ELRC-929-www.sida.se/v1 #                            1,545 sentences
+  - opus_ELRC-823-Swedish_Swedish_Crim/v1 #                   1,503 sentences
+  - opus_ELRC-416-Swedish_Social_Secur/v1 #                   1,447 sentences
+  - opus_ELRC-416-Swedish_Social_Secur/v1 #                   1,446 sentences
+  - opus_ELRC-436-Swedish_Food/v1 #                           1,147 sentences
+  - opus_ELRA-W0305/v1 #                                      1,146 sentences
+  - opus_ELRC-406-Swedish_Labour_Part1/v1 #                   1,011 sentences
+  - opus_ELRC-3213-antibiotic/v1 #                              953 sentences
+  - opus_ELRC-830-Swedish_Economic_Reg/v1 #                     949 sentences
+  - opus_ELRC-3302-EUROPARL_covid/v1 #                          844 sentences
+  - opus_tldr-pages/v2023-08-29 #                               566 sentences
+  - opus_ELRC-3082-wikipedia_health/v1 #                        535 sentences
+  - opus_ELRC_2922/v1 #                                         534 sentences
+  - opus_ELRC_2923/v1 #                                         499 sentences
+  - opus_ELRC-2752-vaccination/v1 #                             497 sentences
+  - opus_ELRC-vaccination/v1 #                                  497 sentences
+  - mtdata_ELRC-swedish_social_security-1-eng-swe
+  - mtdata_ELRC-swedish_work_environment-1-eng-swe
+  - mtdata_ELRC-social_insurance_försäkringskassan-1-eng-swe
+  - mtdata_ELRC-finnish_information_bank-1-eng-swe
+  - mtdata_ELRC-swedish_competition_authority_konkurrensverket-1-eng-swe
+  - mtdata_ELRC-swedish_audit_riksrevisionen-1-eng-swe
+  - mtdata_ELRC-swedish_swedish_crime_victim_compensation_support_authority-1-eng-swe
+  - mtdata_ELRC-swedish_migration_board_migrationsverket-1-eng-swe
+  - mtdata_ELRC-swedish_economic_regional_growth_tillväxtverket-1-eng-swe
+  - mtdata_ELRC-annual_reports_swedish_pension_system-1-eng-swe
+  - mtdata_ELRC-sweden_a_pocket_guide_book-1-eng-swe
+  - mtdata_ELRC-eu_publications_medical_v2-1-eng-swe
+  - mtdata_ELRC-nteu_tierb-1-eng-swe
+  - mtdata_EU-eac_forms-1-eng-swe #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-swe #                    ~31,162 sentences (3.5 MB)
+  - mtdata_EU-dcep-1-eng-swe #                            ~980,674 sentences (110.8 MB)
+  - mtdata_Lindat-khresmoi_summary_test-2-eng-swe #        ~11,808 sentences (1.3 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-swe #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-dcep_wmt17-1-swe-eng #                ~1,137,607 sentences (128.5 MB)
+  - mtdata_Statmt-books_wmt17-1-swe-eng #                   ~2,797 sentences (316.2 kB)
+  - mtdata_Tilde-eesc-2017-eng-swe #                    ~1,798,328 sentences (203.2 MB)
+  - mtdata_Tilde-ema-2016-eng-swe #                       ~215,912 sentences (24.4 MB)
+  - mtdata_Tilde-ecb-2017-eng-swe #                         ~3,314 sentences (374.5 kB)
+  - mtdata_Tilde-rapid-2016-eng-swe #                     ~400,648 sentences (45.3 MB)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-src: []
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/tr-en-spring-2024.yml b/configs/tr-en-spring-2024.yml
new file mode 100644
index 000000000..e56b22c38
--- /dev/null
+++ b/configs/tr-en-spring-2024.yml
@@ -0,0 +1,144 @@
+# The initial configuration was generated using:
+# task config-generator -- tr en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: tr
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-tur
+  - flores_aug-mix_dev
+  - sacrebleu_aug-mix_wmt18/test-ts
+  - sacrebleu_aug-mix_wmt16
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+  - sacrebleu_wmt18
+  - sacrebleu_wmt17
+  - sacrebleu_wmt16/dev
+
+  # The training data contains:
+  #   121,323,758 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (47,045,956 sentences)
+  #  - opus_MultiMaCoCu/v2 - ignored datasets (1,646,740 sentences)
+  #  - opus_GNOME/v1 - not enough data  (150 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-tur - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-tur - duplicate with opus
+  #  - mtdata_LinguaTools-wikititles-2014-eng-tur - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-tur - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-tur_TR - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       47,045,956 sentences
+  - opus_OpenSubtitles/v2018 #                           44,986,121 sentences
+  - opus_CCAligned/v1 #                                  13,650,311 sentences
+  - opus_LinguaTools-WikiTitles/v2014 #                   3,915,248 sentences
+  - opus_XLEnt/v1.2 #                                     3,809,464 sentences
+  - opus_MaCoCu/v2 #                                      1,646,741 sentences
+  - opus_GoURMET/v1 #                                     1,308,307 sentences
+  - opus_Tanzil/v1 #                                      1,189,967 sentences
+  - opus_Tatoeba/v2023-04-12 #                              676,920 sentences
+  - opus_wikimedia/v20230407 #                              668,099 sentences
+  - opus_QED/v2.0a #                                        482,964 sentences
+  - opus_WikiMatrix/v1 #                                    477,736 sentences
+  - opus_TED2020/v1 #                                       378,033 sentences
+  - opus_SETIMES/v2 #                                       207,678 sentences
+  - opus_NeuLab-TedTalks/v1 #                               195,641 sentences
+  - opus_Wikipedia/v1.0 #                                   159,979 sentences
+  - opus_KDE4/v2 #                                          153,438 sentences
+  - opus_TED2013/v1.1 #                                     137,028 sentences
+  - opus_bible-uedin/v1 #                                    60,411 sentences
+  - opus_infopankki/v1 #                                     44,030 sentences
+  - opus_Bianet/v1 #                                         34,770 sentences
+  - opus_PHP/v1 #                                            32,713 sentences
+  - opus_EUbookshop/v2 #                                     23,706 sentences
+  - opus_WMT-News/v2019 #                                    20,016 sentences
+  - opus_GlobalVoices/v2018q4 #                               7,838 sentences
+  - opus_ELRC-3057-wikipedia_health/v1 #                      2,368 sentences
+  - opus_ELRC-wikipedia_health/v1 #                           2,368 sentences
+  - opus_ELRC_2922/v1 #                                       2,367 sentences
+  - opus_tldr-pages/v2023-08-29 #                             1,956 sentences
+  - opus_TildeMODEL/v2018 #                                   1,584 sentences
+  - mtdata_EU-eac_forms-1-eng-tur #                        ~31,162 sentences (3.5 MB)
+  - mtdata_EU-eac_reference-1-eng-tur #                    ~31,162 sentences (3.5 MB)
+  - mtdata_Neulab-tedtalks_test-1-eng-tur #             ~3,117,009 sentences (352.2 MB)
+  - mtdata_Statmt-newsdev_tren-2016-tur-eng #             ~402,756 sentences (45.5 MB)
+  - mtdata_Statmt-newsdev_entr-2016-eng-tur #             ~402,756 sentences (45.5 MB)
+  - mtdata_Tilde-worldbank-1-eng-tur #                      ~1,827 sentences (206.5 kB)
+
+  # The monolingual data contains:
+  #   ~20,230,124 sentences
+  mono-src:
+  - news-crawl_news.2010  #                 ~38 sentences (4.4K)
+  - news-crawl_news.2017 #          ~1,194,690 sentences (135M)
+  - news-crawl_news.2018 #          ~1,964,601 sentences (222M)
+  - news-crawl_news.2019 #          ~3,168,141 sentences (358M)
+  - news-crawl_news.2020 #          ~3,716,814 sentences (420M)
+  - news-crawl_news.2021 #          ~3,814,159 sentences (431M)
+  - news-crawl_news.2022 #          ~3,575,221 sentences (404M)
+  - news-crawl_news.2023 #          ~2,796,460 sentences (316M)
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/configs/vi-en-spring-2024.yml b/configs/vi-en-spring-2024.yml
new file mode 100644
index 000000000..bd4202652
--- /dev/null
+++ b/configs/vi-en-spring-2024.yml
@@ -0,0 +1,111 @@
+# The initial configuration was generated using:
+# task config-generator -- vi en --name spring-2024
+#
+# The documentation for this config can be found here:
+# https://github.com/mozilla/firefox-translations-training/blob/c2a6e7f8c899ba363c5058e200692bfd8e321299/taskcluster/configs/config.prod.yml
+experiment:
+  name: spring-2024
+  src: vi
+  trg: en
+  best-model: chrf
+  use-opuscleaner: 'true'
+  opuscleaner-mode: defaults
+  bicleaner:
+    default-threshold: 0.5
+    dataset-thresholds: {}
+  mono-max-sentences-src: 500_000_000
+  mono-max-sentences-trg: 200_000_000
+  spm-sample-size: 10_000_000
+  spm-vocab-size: 32000
+  teacher-ensemble: 2
+  teacher-mode: two-stage
+  pretrained-models: {}
+datasets:
+  devtest:
+  - mtdata_Neulab-tedtalks_dev-1-eng-vie
+  - flores_aug-mix_dev
+  test:
+  - flores_devtest
+  - flores_aug-mix_devtest
+  - flores_aug-title_devtest
+  - flores_aug-upper_devtest
+  - flores_aug-typos_devtest
+  - flores_aug-noise_devtest
+  - flores_aug-inline-noise_devtest
+
+  # The training data contains:
+  #   69,085,316 sentences
+  # 
+  # Skipped datasets:
+  #  - opus_CCMatrix/v1 - ignored datasets (50,092,444 sentences)
+  #  - opus_GNOME/v1 - not enough data  (149 sentences)
+  #  - opus_Ubuntu/v14.10 - not enough data  (0 sentences)
+  #  - opus_XLEnt/v1.2 - not enough data  (0 sentences)
+  #  - mtdata_ELRC-wikipedia_health-1-eng-vie - duplicate with opus
+  #  - mtdata_Facebook-wikimatrix-1-eng-vie - duplicate with opus
+  #  - mtdata_Neulab-tedtalks_train-1-eng-vie - duplicate with opus
+  #  - mtdata_Statmt-ccaligned-1-eng-vie_VN - duplicate with opus
+  train:
+  - opus_NLLB/v1  #                                       50,092,444 sentences
+  - opus_CCAligned/v1 #                                  12,394,417 sentences
+  - opus_OpenSubtitles/v2018 #                            3,505,276 sentences
+  - opus_WikiMatrix/v1 #                                  1,073,752 sentences
+  - opus_wikimedia/v20230407 #                              669,743 sentences
+  - opus_QED/v2.0a #                                        338,024 sentences
+  - opus_TED2020/v1 #                                       326,417 sentences
+  - opus_NeuLab-TedTalks/v1 #                               184,973 sentences
+  - opus_StanfordNLP-NMT/v1.0 #                             133,167 sentences
+  - opus_ELRC-wikipedia_health/v1 #                         126,413 sentences
+  - opus_bible-uedin/v1 #                                   124,390 sentences
+  - opus_Wikipedia/v1.0 #                                    58,116 sentences
+  - opus_KDE4/v2 #                                           42,782 sentences
+  - opus_Tatoeba/v2023-04-12 #                                6,855 sentences
+  - opus_ELRC-3086-wikipedia_health/v1 #                      4,274 sentences
+  - opus_ELRC_2922/v1 #                                       4,273 sentences
+  - mtdata_Neulab-tedtalks_test-1-eng-vie #             ~3,117,009 sentences (352.2 MB)
+
+  # The monolingual data contains:
+  #   ~0 sentences
+  mono-src: []
+
+  # The monolingual data contains:
+  #   ~195,823,002 sentences
+  mono-trg:
+  - news-crawl_news.2007  #          ~1,557,522 sentences (176M)
+  - news-crawl_news.2008 #          ~5,389,380 sentences (609M)
+  - news-crawl_news.2009 #          ~6,557,522 sentences (741M)
+  - news-crawl_news.2010 #          ~3,247,787 sentences (367M)
+  - news-crawl_news.2011 #          ~6,318,584 sentences (714M)
+  - news-crawl_news.2012 #          ~6,407,079 sentences (724M)
+  - news-crawl_news.2013 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2014 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2015 #         ~10,619,469 sentences (1.2G)
+  - news-crawl_news.2016 #          ~7,982,300 sentences (902M)
+  - news-crawl_news.2017 #         ~11,504,424 sentences (1.3G)
+  - news-crawl_news.2018 #          ~7,920,353 sentences (895M)
+  - news-crawl_news.2019 #         ~17,699,115 sentences (2.0G)
+  - news-crawl_news.2020 #         ~22,123,893 sentences (2.5G)
+  - news-crawl_news.2021 #         ~21,238,938 sentences (2.4G)
+  - news-crawl_news.2022 #         ~23,008,849 sentences (2.6G)
+  - news-crawl_news.2023 #         ~23,008,849 sentences (2.6G)
+marian-args:
+  decoding-backward:
+    beam-size: '12'
+    mini-batch-words: '2000'
+  decoding-teacher:
+    mini-batch-words: '4000'
+    precision: float16
+  training-backward:
+    early-stopping: '5'
+  training-teacher:
+    early-stopping: '20'
+  training-student:
+    early-stopping: '20'
+  training-student-finetuned:
+    early-stopping: '20'
+target-stage: all
+wandb-publication: true
+taskcluster:
+  split-chunks: 20
+  worker-classes:
+    default: gcp-spot
diff --git a/poetry.lock b/poetry.lock
index 9659d55d4..a05f55a29 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3811,4 +3811,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "9ef01d74291f3092911ace9f003f728c4f9d691a4e0fdcfe15ab05dd1fa5ad00"
+content-hash = "f4c1131b0c136675710a2997b3ef56fd010e3808b64bdc7ded3727db31ed5f6a"
diff --git a/pyproject.toml b/pyproject.toml
index 9961b2e21..4ecdf00b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ websocket_client ="*"
 # This install group is for running tests. Note that any dependencies in the
 # pipeline are installed separately through the run_task test abstraction. This
 # list is only for things imported directly in the tests.
+ruamel-yaml = "^0.18.6"
 [tool.poetry.group.tests.dependencies]
 mtdata="0.3.2"
 requests="2.26.0"
diff --git a/taskcluster/configs/config.prod.yml b/taskcluster/configs/config.prod.yml
index 7cbc5bd70..4ca63159c 100644
--- a/taskcluster/configs/config.prod.yml
+++ b/taskcluster/configs/config.prod.yml
@@ -9,7 +9,7 @@
 # An "experiment" is an individual training run.
 experiment:
   # Provide an identifiable name for your experiment.
-  name: baseline_en_ru
+  name: baseline
 
   # The source and target languages. This is the language tag part of the
   # BCP 47 locale identifier.
diff --git a/tests/test_find_corpus.py b/tests/test_find_corpus.py
index f402c7e7a..e98a817d3 100644
--- a/tests/test_find_corpus.py
+++ b/tests/test_find_corpus.py
@@ -75,10 +75,6 @@ def test_opus(mock_opus_data, capsys):
         capsys,
         "The opus dataset outputs nicely.",
         """
-        Fetching datasets from:
-        https://opus.nlpl.eu/opusapi/?source=en&target=ca&preprocessing=moses&version=latest
-
-
         ┌──────────────────────────────┐
         │ OPUS - https://opus.nlpl.eu/ │
         └──────────────────────────────┘
diff --git a/utils/config_generator.py b/utils/config_generator.py
new file mode 100644
index 000000000..17f177e8f
--- /dev/null
+++ b/utils/config_generator.py
@@ -0,0 +1,433 @@
+import argparse
+import re
+import subprocess
+import sys
+from io import StringIO
+from pathlib import Path
+
+import ruamel.yaml
+
+from utils.find_corpus import (
+    fetch_mtdata,
+    fetch_news_crawl,
+    fetch_opus,
+    fetch_sacrebleu,
+    get_remote_file_size,
+)
+
+"""
+Generate a training config for a language pair based on the latest production
+training config, taskcluster/configs/config.prod.yml.
+"""
+
+root_dir = Path(__file__).parent.parent
+prod_config_path = root_dir / "taskcluster/configs/config.prod.yml"
+
+pretrained_student_models = {
+    ("ru", "en"): "https://storage.googleapis.com/releng-translations-dev/models/ru-en/better-teacher/student"
+}  # fmt: skip
+
+skip_datasets = [
+    # The NLLB dataset is based off of the CCMatrix dataset, and is mostly duplicated.
+    "CCMatrix",
+    # Skip Multi* datasets as they are generally multilingual versions of the original datasets.
+    "MultiMaCoCu",
+    "MultiHPLT",
+    # In Russian, the WikiTitles data had its direction reversed. The `LinguaTools-WikiTitles`
+    # version is fine.
+    "WikiTitles",
+]
+
+# Do not include small datasets. This works around #508, and minimizes dataset tasks that
+# won't bring a lot more data.
+minimum_dataset_sentences = 200
+
+flores_101_languages = {
+    "af", "amh", "ar", "as", "ast", "az", "be", "bn", "bs", "bg", "ca", "ceb", "cs", "ckb", "cy",
+    "da", "de", "el", "en", "et", "fa", "fi", "fr", "ful", "ga", "gl", "gu", "ha", "he", "hi",
+    "hr", "hu", "hy", "ig", "id", "is", "it", "jv", "ja", "kam", "kn", "ka", "kk", "kea", "km",
+    "ky", "ko", "lo", "lv", "ln", "lt", "lb", "lg", "luo", "ml", "mr", "mk", "mt", "mn", "mi",
+    "ms", "my", "nl", "nb", "npi", "nso", "ny", "oc", "om", "or", "pa", "pl", "pt", "pus", "ro",
+    "ru", "sk", "sl", "sna", "snd", "so", "es", "sr", "sv", "sw", "ta", "te", "tg", "tl", "th",
+    "tr", "uk", "umb", "ur", "uz", "vi", "wo", "xh", "yo", "zh", "zh", "zu"
+}  # fmt: skip
+
+
+def get_git_revision_hash(remote_branch: str) -> str:
+    """
+    The git hash should be something that will always be around. Check the main branch for the
+    most common ancestor to the local changes. The prod config locally could be different than
+    remote, but it's better
+    """
+    return (
+        subprocess.check_output(["git", "merge-base", remote_branch, "HEAD"])
+        .decode("ascii")
+        .strip()
+    )
+
+
+def update_config(
+    prod_config: any, name: str, source: str, target: str, fast: bool
+) -> dict[str, str]:
+    experiment = prod_config["experiment"]
+
+    # Update the prod config for this language pair.
+    experiment["name"] = name
+    experiment["src"] = source
+    experiment["trg"] = target
+    experiment["bicleaner"]["dataset-thresholds"] = {}
+
+    pretrained_model = pretrained_student_models.get((source, target))
+    if pretrained_model:
+        # Switch to the one stage teacher mode, as the higher quality backtranslations lead
+        # to issues with early stopping when switching between stages.
+        experiment["teacher-mode"] = "one-stage"
+        experiment["pretrained-models"]["train-backwards"]["urls"] = [pretrained_model]
+    else:
+        experiment["pretrained-models"] = {}
+
+    datasets = prod_config["datasets"]
+
+    # Clear out the base config.
+    datasets["train"].clear()
+    datasets["devtest"].clear()
+    datasets["test"].clear()
+    datasets["mono-src"].clear()
+    datasets["mono-trg"].clear()
+
+    # ruamel.yaml only supports inline comments. This dict will do string matching to apply
+    # comments too the top of a section.
+    comment_section = {}
+
+    add_train_data(source, target, datasets, comment_section, fast)
+    add_test_data(
+        source,
+        target,
+        datasets["test"],
+        datasets["devtest"],
+        comment_section,
+    )
+    add_mono_data(source, datasets["mono-src"], "  mono-src:", comment_section)
+    add_mono_data(target, datasets["mono-trg"], "  mono-trg:", comment_section)
+
+    return comment_section
+
+
+def add_train_data(
+    source: str, target: str, datasets: list[str], comment_section: dict[str, str], fast: bool
+):
+    print("Fetching opus")
+    opus_datasets = fetch_opus(source, target)
+    total_sentences = 0
+    skipped_datasets = []
+    visited_corpora = set()
+
+    for dataset in opus_datasets:
+        sentences = dataset.alignment_pairs or 0
+        # Some datasets are ignored or too small to be included.
+        if dataset.corpus in skip_datasets:
+            skipped_datasets.append(
+                f"{dataset.corpus_key()} - ignored datasets ({sentences:,} sentences)"
+            )
+            continue
+        if (dataset.alignment_pairs or 0) < minimum_dataset_sentences:
+            skipped_datasets.append(
+                f"{dataset.corpus_key()} - not enough data  ({sentences:,} sentences)"
+            )
+            continue
+
+        visited_corpora.add(normalize_corpus_name(dataset.corpus))
+        total_sentences += sentences
+        corpus_key = dataset.corpus_key()
+        datasets["train"].append(corpus_key)
+        datasets["train"].yaml_add_eol_comment(
+            f"{sentences:,} sentences".rjust(70 - len(corpus_key), " "),
+            len(datasets["train"]) - 1,
+        )
+
+    print("Fetching mtdata")
+    entries = fetch_mtdata(source, target)
+
+    for corpus_key, entry in entries.items():
+        # mtdata can have test and devtest data as well.
+        if entry.did.name.endswith("test"):
+            dataset = datasets["test"]
+        if entry.did.name.endswith("dev"):
+            dataset = datasets["devtest"]
+        else:
+            dataset = datasets["train"]
+            corpus_name = normalize_corpus_name(entry.did.name)
+            group_corpus_name = normalize_corpus_name(entry.did.group + entry.did.name)
+            if corpus_name in visited_corpora or group_corpus_name in visited_corpora:
+                skipped_datasets.append(f"{corpus_key} - duplicate with opus")
+                continue
+
+            if entry.did.name in skip_datasets:
+                skipped_datasets.append(f"{entry.did.name} - ignored datasets")
+                continue
+
+        dataset.append(corpus_key)
+        if not fast:
+            byte_size, display_size = get_remote_file_size(entry.url)
+            if byte_size:
+                # Don't add the sentences to the total, as these will be commented out by default.
+                sentences = estimate_sentence_size(byte_size)
+                dataset.yaml_add_eol_comment(
+                    f"~{sentences:,} sentences ".rjust(70 - len(corpus_key), " ")
+                    + f"({display_size})",
+                    len(datasets["train"]) - 1,
+                )
+
+    comments = [
+        "The training data contains:",
+        f"  {total_sentences:,} sentences",
+    ]
+    if skipped_datasets:
+        comments.append("")
+        comments.append("Skipped datasets:")
+        for d in skipped_datasets:
+            comments.append(f" - {d}")
+
+    train_comment = "\n".join(comments)
+
+    comment_section["  train:"] = train_comment
+
+
+def normalize_corpus_name(corpus_name: str):
+    """Normalize the corpus name so that it's easy to deduplicate between opus and mtdata."""
+
+    # Remove the language tags at the end.
+    # mtdata_ELRC-vnk.fi-1-eng-fin
+    #                     ^^^^^^^^
+    corpus_name = re.sub(r"-\w{3}-\w{3}$", "", corpus_name)
+
+    corpus_name = corpus_name.lower()
+
+    # Remove numbers anything that is not a letter. This is a little aggressive, but should help
+    # deduplicate more datasets. For example:
+    #   opus: 725-Hallituskausi_2011_2
+    #   mtdata: hallituskausi_2011_2015-1-eng-fin
+    corpus_name = re.sub(r"[^a-z]", "", corpus_name.lower())
+
+    # Datasets could be split by train/test/dev. Remove the "train" word so that it will match
+    # between Opus and mtdata.
+    #   opus: NeuLab-TedTalks/v1
+    #   mtdata: Neulab-tedtalks_train-1-eng-fin
+    #   mtdata: Neulab-tedtalks_test-1-eng-fin
+    #   mtdata: Neulab-tedtalks_dev-1-eng-fin
+    corpus_name = re.sub(r"train$", "", corpus_name)
+
+    return corpus_name
+
+
+def add_test_data(
+    source: str,
+    target: str,
+    test_datasets: list[str],
+    devtest_datasets: list[str],
+    comment_section: dict[str, str],
+):
+    skipped_datasets = []
+    print("Fetching flores")
+    if source in flores_101_languages and target in flores_101_languages:
+        test_datasets.append("flores_devtest")
+
+        # Add augmented datasets to check performance for the specific cases
+        test_datasets.append("flores_aug-mix_devtest")
+        test_datasets.append("flores_aug-title_devtest")
+        test_datasets.append("flores_aug-upper_devtest")
+        test_datasets.append("flores_aug-typos_devtest")
+        test_datasets.append("flores_aug-noise_devtest")
+        test_datasets.append("flores_aug-inline-noise_devtest")
+
+        devtest_datasets.append("flores_aug-mix_dev")
+
+    is_test = True  # Flip between devtest and test.
+    print("Fetching sacrebleu")
+    for d in fetch_sacrebleu(source, target):
+        # Work around: PLW2901 `for` loop variable `dataset_name` overwritten by assignment target
+        dataset_name = d
+        if dataset_name in skip_datasets:
+            # This could be a dataset with a variant design.
+            skipped_datasets.append(f"{dataset_name} - variant dataset")
+        else:
+            dataset_name = dataset_name.replace("sacrebleu_", "")
+            if is_test:
+                test_datasets.append(f"sacrebleu_{dataset_name}")
+            else:
+                devtest_datasets.append(f"sacrebleu_aug-mix_{dataset_name}")
+            is_test = not is_test
+
+    if skipped_datasets:
+        test_comment = "\n".join(
+            [
+                "Skipped test/devtest datasets:",
+                *[f" - {d}" for d in skipped_datasets],
+            ]
+        )
+
+        comment_section["  devtest:"] = test_comment
+
+
+def estimate_sentence_size(bytes: int) -> int:
+    """Estimate the sentences based on the compressed byte size"""
+    # One dataset measured 113 bytes per sentence, use that as a rough estimate.
+    bytes_per_sentence = 113
+    return bytes // bytes_per_sentence
+
+
+def add_mono_data(
+    lang: str,
+    datasets: list[str],
+    comment_key: str,
+    comment_section: dict[str, str],
+):
+    print("Fetching newscrawl for", lang)
+    sentence_count = 0
+    for dataset in fetch_news_crawl(lang):
+        datasets.append(dataset.name)
+        if dataset.size:
+            sentences = estimate_sentence_size(dataset.size)
+            sentence_count += sentences
+            datasets.yaml_add_eol_comment(
+                f"~{sentences:,} sentences ".rjust(50 - len(dataset.name), " ")
+                + f"({dataset.display_size})",
+                len(datasets) - 1,
+            )
+
+    comment = "\n".join(
+        [
+            "The monolingual data contains:",
+            f"  ~{sentence_count:,} sentences",
+        ]
+    )
+
+    comment_section[comment_key] = comment
+
+
+def strip_comments(yaml_text: str) -> list[str]:
+    """
+    ruamel.yaml preserves key ordering and comments. This function strips out the comments
+
+    """
+    result = ""
+    for l in yaml_text.splitlines():
+        # Work around: PLW2901 `for` loop variable `line` overwritten by assignment target
+        line = l
+        if line.strip().startswith("#"):
+            continue
+
+        # Remove any comments at the end.
+        line = re.sub(r"#[\s\w\-.]*$", "", line)
+
+        # Don't add any empty lines.
+        if line.strip():
+            result += line.rstrip() + "\n"
+
+    return result
+
+
+def apply_comments_to_yaml_string(yaml, prod_config, comment_section, remote_branch: str) -> str:
+    """
+    ruamel.yaml only supports inline comments, so do direct string manipulation to apply
+    all the comments needed.
+    """
+    # Dump out the yaml to a string so that it can be manipulated.
+    output_stream = StringIO()
+    yaml.dump(prod_config, output_stream)
+    yaml_string: str = output_stream.getvalue()
+    yaml_string = apply_comment_section(comment_section, yaml_string)
+
+    script_args = " ".join(sys.argv[1:])
+    return "\n".join(
+        [
+            "# The initial configuration was generated using:",
+            f"# task config-generator -- {script_args}",
+            "#",
+            "# The documentation for this config can be found here:",
+            f"# https://github.com/mozilla/firefox-translations-training/blob/{get_git_revision_hash(remote_branch)}/taskcluster/configs/config.prod.yml",
+            yaml_string,
+        ]
+    )
+
+
+def apply_comment_section(comment_section: dict[str, str], yaml_string: str) -> str:
+    for key, raw_comment in comment_section.items():
+        # Find the indent amount for the key.
+        match = re.search(r"^(?P<indent>\s*)", key)
+        if not match:
+            raise Exception("Could not find regex match")
+        indent = match.group("indent")
+
+        # Indent the lines, and add the # comment.
+        comment = "\n".join([f"{indent}# {line}" for line in raw_comment.splitlines()])
+
+        yaml_string = yaml_string.replace(f"\n{key}", f"\n\n{comment}\n{key}")
+    return yaml_string
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        # Preserves whitespace in the help text.
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+
+    parser.add_argument("source", metavar="SOURCE", type=str, help="The source language tag")
+    parser.add_argument("target", metavar="TARGET", type=str, help="The target language tag")
+    parser.add_argument(
+        "--name",
+        metavar="name",
+        type=str,
+        required=True,
+        help="The name of the config, which gets constructed like so: configs/{source}-{target}-{name}.yml",
+    )
+    parser.add_argument(
+        "--remote_branch",
+        metavar="REF",
+        type=str,
+        default="origin/main",
+        help="The remote branch that contains the config.prod.yml. Typically origin/main, or origin/release",
+    )
+    parser.add_argument(
+        "--fast",
+        action="store_true",
+        help="Skip slow network requests like looking up dataset size",
+    )
+
+    args = parser.parse_args()
+
+    # Validate the inputs.
+    langtag_re = r"[a-z]{2,3}"
+    if not re.fullmatch(langtag_re, args.source):
+        print("The source language should be a 2 or 3 letter lang tag.")
+    if not re.fullmatch(langtag_re, args.target):
+        print("The target language should be a 2 or 3 letter lang tag.")
+    if not re.fullmatch(r"[\w\d-]+", args.name):
+        print(
+            "The name of the training config should only contain alphanumeric, underscores, and dashes.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    # ruamel.yaml preserves comments and ordering unlink PyYAML
+    yaml = ruamel.yaml.YAML()
+
+    # Load the prod yaml.
+    with prod_config_path.open() as f:
+        yaml_string = f.read()
+    yaml_string = strip_comments(yaml_string)
+    prod_config = yaml.load(StringIO(yaml_string))
+
+    comment_section = update_config(prod_config, args.name, args.source, args.target, args.fast)
+    final_config = apply_comments_to_yaml_string(
+        yaml, prod_config, comment_section, args.remote_branch
+    )
+    final_config_path = root_dir / "configs" / f"{args.source}-{args.target}-{args.name}.yml"
+
+    print("Writing config to:", str(final_config_path))
+    final_config_path.write_text(final_config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/find_corpus.py b/utils/find_corpus.py
index 47f4484e9..2629e3a05 100755
--- a/utils/find_corpus.py
+++ b/utils/find_corpus.py
@@ -9,6 +9,7 @@
 
 import argparse
 import logging
+import re
 import sys
 from typing import NamedTuple, Optional, TypeVar, Union
 
@@ -42,7 +43,7 @@ class OpusDataset(NamedTuple):
 
     latest: Union["True", "False"]
 
-    def name(self) -> str:
+    def corpus_key(self) -> str:
         return f"opus_{self.corpus}/{self.version}"
 
     def website_url(self) -> str:
@@ -52,23 +53,25 @@ def humanize_size(self) -> str:
         return humanize.naturalsize(self.size * 1024)
 
 
-def get_opus(source: str, target: str, download_url: bool):
+def fetch_opus(source: str, target: str) -> list[OpusDataset]:
     # This API is documented: https://opus.nlpl.eu/opusapi/
     url = f"https://opus.nlpl.eu/opusapi/?source={source}&target={target}&preprocessing=moses&version=latest"
 
-    print(f"Fetching datasets from:\n{url}\n")
-
     datasets = requests.get(url).json()
 
     # Convert the response into a typed object that is sorted.
     datasets_typed = [OpusDataset(**corpus_data) for corpus_data in datasets.get("corpora", [])]
-    datasets_typed = sorted(datasets_typed, key=lambda x: x.alignment_pairs or 0, reverse=True)
+    return sorted(datasets_typed, key=lambda x: x.alignment_pairs or 0, reverse=True)
+
 
+def get_opus(source: str, target: str, download_url: bool):
     print("")
     print("┌──────────────────────────────┐")
     print("│ OPUS - https://opus.nlpl.eu/ │")
     print("└──────────────────────────────┘")
 
+    datasets = fetch_opus(source, target)
+
     print_table(
         [
             [
@@ -81,31 +84,33 @@ def get_opus(source: str, target: str, download_url: bool):
             *[
                 [
                     dataset.corpus,
-                    dataset.name(),
+                    dataset.corpus_key(),
                     dataset.alignment_pairs,
                     dataset.humanize_size(),
                     dataset.url if download_url else dataset.website_url(),
                 ]
-                for dataset in datasets_typed
+                for dataset in datasets
                 if dataset.alignment_pairs
             ],
         ]
     )
 
-    names = [f'opus_{d["corpus"]}/{d["version"]}' for d in datasets["corpora"]]
+    names = [dataset.corpus_key() for dataset in datasets]
     print_yaml(names, exclude=["OPUS100v", "WMT-News"])
 
 
-def get_sacrebleu(source: str, target: str):
+def fetch_sacrebleu(source: str, target: str) -> dict[str, dict[str, any]]:
     import sacrebleu
 
-    entries = [
-        (name, entry)
+    return {
+        name: entry
         for name, entry in sacrebleu.DATASETS.items()
         if f"{source}-{target}" in entry or f"{target}-{source}" in entry
-    ]
+    }
 
-    names = [f"sacrebleu_{name}" for name, entry in entries]
+
+def get_sacrebleu(source: str, target: str):
+    datasets_dict = fetch_sacrebleu(source, target)
 
     print("")
     print("┌─────────────────────────────────────────────────┐")
@@ -118,14 +123,14 @@ def get_sacrebleu(source: str, target: str):
                 [
                     #
                     name,
-                    entry["description"],
-                    ", ".join(entry["data"]),
+                    dataset["description"],
+                    ", ".join(dataset["data"]),
                 ]
-                for name, entry in entries
+                for name, dataset in datasets_dict.items()
             ],
         ]
     )
-    print_yaml(names)
+    print_yaml(list(f"sacrebleu_{name}" for name in datasets_dict.keys()))
 
 
 def get_size(tags: list[str]) -> str:
@@ -305,41 +310,33 @@ def get_huggingface_any(language: str):
     )
 
 
-def get_remote_file_size(url: str, display_not_200: bool = True) -> Optional[int]:
+def get_remote_file_size(
+    url: str, display_not_200: bool = True
+) -> tuple[Optional[int], Optional[str]]:
     try:
-        response = requests.head(url, timeout=1)
+        response = requests.head(url, timeout=1, allow_redirects=True)
 
         if response.status_code == 200:
-            return humanize.naturalsize(int(response.headers.get("Content-Length", 0)))
+            int_size = int(response.headers.get("Content-Length", 0))
+            return int_size, humanize.naturalsize(int_size)
         else:
             if display_not_200:
                 print(f"Failed to retrieve file information. Status code: {response.status_code}")
-            return None
+            return None, None
     except requests.exceptions.RequestException as e:
         print(f"An error occurred: {e}")
-        return None
+        return None, None
 
 
 T = TypeVar("T")
 
-
-def exclude_by_name(excludes: list[str], names: list[str], entries: list[T]) -> list[T]:
-    """Exclude entries by an excludes list, and a name list."""
-    filtered_entries = []
-    for name, entry in zip(names, entries):
-        filter = False
-        for exclude in excludes:
-            if exclude.lower() in name.lower():
-                filter = True
-                break
-
-        if not filter:
-            filtered_entries.append(entry)
-
-    return filtered_entries
+from mtdata.entry import Entry
 
 
-def get_mtdata(source: str, target: str):
+def fetch_mtdata(source: str, target: str) -> dict[str, Entry]:
+    """
+    Returns a dict that maps the corpus key to the mtdata entry.
+    """
     # mtdata outputs debug logs
     logging.disable(logging.CRITICAL)
 
@@ -353,14 +350,30 @@ def get_mtdata(source: str, target: str):
         get_entries(lang_pair(source_tricode + "-" + target_tricode), None, None, True),
         key=lambda entry: entry.did.group,
     )
-    excludes = ["opus", "newstest", "UNv1"]
 
-    def get_name(entry):
+    def get_corpus_key(entry):
         return (
             f"mtdata_{entry.did.group}-{entry.did.name}-{entry.did.version}-{entry.did.lang_str}"
         )
 
-    names = [get_name(entry) for entry in entries]
+    entries = {get_corpus_key(entry): entry for entry in entries}
+
+    excludes = ["opus", "newstest", "unv1"]  # lowercase excludes.
+
+    def is_excluded(corpus_key: str) -> bool:
+        for exclude in excludes:
+            if exclude in corpus_key.lower():
+                return True
+        return False
+
+    # Filter out the excluded entries.
+    return {
+        corpus_key: entry for corpus_key, entry in entries.items() if not is_excluded(corpus_key)
+    }
+
+
+def get_mtdata(source: str, target: str):
+    entries = fetch_mtdata(source, target)
 
     print("")
     print("┌────────────────────────────────────────────────┐")
@@ -376,32 +389,85 @@ def get_name(entry):
             *[
                 [
                     #
-                    get_name(entry),
+                    corpus_key,
                     entry.url,
                     # get_remote_file_size(entry.url),
                 ]
-                for entry in
+                for corpus_key, entry in entries.items()
                 # Filter out the excludes
-                exclude_by_name(excludes, names, entries)
             ],
         ]
     )
 
-    print_yaml(names, exclude=excludes)
+    print_yaml(entries.keys())
+
+
+class NewsCrawlDataset(NamedTuple):
+    name: str
+    url: str
+    size: Optional[int]
+    display_size: Optional[int]
+
+
+def fetch_news_crawl(lang: str) -> list[NewsCrawlDataset]:
+    base_url = f"https://data.statmt.org/news-crawl/{lang}/"
+    response = requests.get(base_url, allow_redirects=True)
+
+    datasets = []
+    if response.ok:
+        # Example row: (indentation and newlines added)
+        # <tr>
+        #     <td valign="top"><img src="/icons/compressed.gif" alt="[   ]"></td>
+        #     <td><a href="news.2013.en.shuffled.deduped.gz">news.2013.en.shuffled.deduped.gz</a></td>
+        #     <td align="right">2019-01-14 10:23  </td>
+        #     <td align="right">1.2G</td>
+        #     <td>&nbsp;</td>
+        # </tr>
+
+        regex = re.compile(
+            r"""
+            # Match the file name year.
+            # >news.2008.en.shuffled.deduped.gz<
+            #       ^^^^
+            >news.(\d+)\.\w+\.shuffled\.deduped\.gz<
+            [^\n]*
+
+            # Match the file size and unit.
+            # <td align="right">176M</td>
+            #                   ^^^^
+            <td\ align="right">
+                ([\d\.]+)(\w+)
+            </td>
+        """,
+            re.VERBOSE,
+        )
+
+        matches = re.findall(regex, response.text)
+
+        if matches:
+            for year, size_number, size_unit in matches:
+                if size_unit == "K":
+                    multiplier = 1_000
+                elif size_unit == "M":
+                    multiplier = 1_000_000
+                elif size_unit == "G":
+                    multiplier = 1_000_000_000
+
+                name = f"news-crawl_news.{year}"
+                url = f"https://data.statmt.org/news-crawl/{lang}/news.{year}.{lang}.shuffled.deduped.gz"
+                size = int(float(size_number) * multiplier)
+
+                datasets.append(NewsCrawlDataset(name, url, size, f"{size_number}{size_unit}"))
+        else:
+            print("The regex could not find newscrawl datasets for", lang)
+    else:
+        print("No newscrawl data was available for", lang)
+    return datasets
 
 
 def get_news_crawl(source: str, target: str):
     for lang in (source, target):
-        datasets = []
-        for i in range(20):
-            year = 2007 + i
-            name = f"news-crawl_news.{year}"
-            url = (
-                f"https://data.statmt.org/news-crawl/{lang}/news.{year}.{lang}.shuffled.deduped.gz"
-            )
-            size = get_remote_file_size(url, display_not_200=False)
-            if size is not None:
-                datasets.append((name, url, size))
+        datasets = fetch_news_crawl(lang)
 
         print("")
         print("┌─────────────────────────────────────────────────────────────────────┐")
@@ -414,11 +480,11 @@ def get_news_crawl(source: str, target: str):
                     "URL",
                     "Size",
                 ],
-                *[[name, url, size] for name, url, size in datasets],
+                *[[name, url, display_size] for name, url, _, display_size in datasets],
             ]
         )
 
-        print_yaml([name for name, _, _ in datasets])
+        print_yaml([name for name, _, _, _ in datasets])
 
 
 def print_yaml(names: list[str], exclude: list[str] = []):