From 12ec00caee7809a4f58f067423fa16a69ed12c4d Mon Sep 17 00:00:00 2001
From: bsantan <70932395+bsantan@users.noreply.github.com>
Date: Wed, 21 Feb 2024 12:48:57 -0700
Subject: [PATCH] Push non-organism files to data/raw

---
 src/uniprot2s3/main.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/uniprot2s3/main.py b/src/uniprot2s3/main.py
index 15c7c3f..9975fe2 100644
--- a/src/uniprot2s3/main.py
+++ b/src/uniprot2s3/main.py
@@ -116,7 +116,7 @@ def run_proteome_api(show_status: bool) -> set:
     requests_cache.install_cache("uniprot_cache")
 
     # Ensure the directory for storing Uniprot files exists
-    Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True)
+    Path(RAW_DATA_DIR).mkdir(parents=True, exist_ok=True)
 
     organism_ids_set = fetch_uniprot_reference_proteome_data()
 
@@ -182,7 +182,7 @@ def fetch_uniprot_data(organism_id):
 
 def fetch_uniprot_reference_proteome_data() -> set:
     """Single URL request for Uniprot proteome data."""
-    file_path = Path(UNIPROT_S3_DIR) / f"{PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
+    file_path = Path(RAW_DATA_DIR) / f"{PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
     all_proteomes_query = "%28*%29"
 
     url = construct_query_url(
@@ -236,9 +236,6 @@ def run_uniprot_api(taxa_id_from_proteomes_set, show_status: bool) -> None:
     # Cache HTTP requests to avoid repeated calls
     requests_cache.install_cache("uniprot_cache")
 
-    # Ensure the directory for storing Uniprot files exists
-    # Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True)
-
     organism_list = get_organism_list()
 
     taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_set))
@@ -271,15 +268,12 @@ def run_uniprot_api_parallel(taxa_id_from_proteomes_set, show_status: bool, work
     # Cache HTTP requests to avoid repeated calls
     requests_cache.install_cache("uniprot_cache")
 
-    # Ensure the directory for storing Uniprot files exists
-    # Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True)
-
     organism_list = get_organism_list()
 
     taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_set))
 
     # Write used IDs to file
-    file_path = Path(UNIPROT_S3_DIR) / f"{KGMICROBE_PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
+    file_path = Path(RAW_DATA_DIR) / f"{KGMICROBE_PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
     with open(file_path, "w") as f:
         for line in taxa_id_common_with_proteomes_list:
             f.write(f"{line}\n")