From 12ec00caee7809a4f58f067423fa16a69ed12c4d Mon Sep 17 00:00:00 2001 From: bsantan <70932395+bsantan@users.noreply.github.com> Date: Wed, 21 Feb 2024 12:48:57 -0700 Subject: [PATCH] Push non-organism files to data/raw --- src/uniprot2s3/main.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/uniprot2s3/main.py b/src/uniprot2s3/main.py index 15c7c3f..9975fe2 100644 --- a/src/uniprot2s3/main.py +++ b/src/uniprot2s3/main.py @@ -116,7 +116,7 @@ def run_proteome_api(show_status: bool) -> set: requests_cache.install_cache("uniprot_cache") # Ensure the directory for storing Uniprot files exists - Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True) + Path(RAW_DATA_DIR).mkdir(parents=True, exist_ok=True) organism_ids_set = fetch_uniprot_reference_proteome_data() @@ -182,7 +182,7 @@ def fetch_uniprot_data(organism_id): def fetch_uniprot_reference_proteome_data() -> set: """Single URL request for Uniprot proteome data.""" - file_path = Path(UNIPROT_S3_DIR) / f"{PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}" + file_path = Path(RAW_DATA_DIR) / f"{PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}" all_proteomes_query = "%28*%29" url = construct_query_url( @@ -236,9 +236,6 @@ def run_uniprot_api(taxa_id_from_proteomes_set, show_status: bool) -> None: # Cache HTTP requests to avoid repeated calls requests_cache.install_cache("uniprot_cache") - # Ensure the directory for storing Uniprot files exists - # Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True) - organism_list = get_organism_list() taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_set)) @@ -271,15 +268,12 @@ def run_uniprot_api_parallel(taxa_id_from_proteomes_set, show_status: bool, work # Cache HTTP requests to avoid repeated calls requests_cache.install_cache("uniprot_cache") - # Ensure the directory for storing Uniprot files exists - # Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True) - organism_list = get_organism_list() taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_set)) # Write used IDs to file - file_path = Path(UNIPROT_S3_DIR) / f"{KGMICROBE_PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}" + file_path = Path(RAW_DATA_DIR) / f"{KGMICROBE_PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}" with open(file_path, "w") as f: for line in taxa_id_common_with_proteomes_list: f.write(f"{line}\n")