Skip to content
This repository has been archived by the owner on Feb 21, 2024. It is now read-only.

Commit

Permalink
Push non-organism files to data/raw
Browse files Browse the repository at this point in the history
  • Loading branch information
bsantan committed Feb 21, 2024
1 parent c2883f3 commit 12ec00c
Showing 1 changed file with 3 additions and 9 deletions.
12 changes: 3 additions & 9 deletions src/uniprot2s3/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def run_proteome_api(show_status: bool) -> set:
requests_cache.install_cache("uniprot_cache")

# Ensure the directory for storing Uniprot files exists
Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True)
Path(RAW_DATA_DIR).mkdir(parents=True, exist_ok=True)

organism_ids_set = fetch_uniprot_reference_proteome_data()

Expand Down Expand Up @@ -182,7 +182,7 @@ def fetch_uniprot_data(organism_id):

def fetch_uniprot_reference_proteome_data() -> set:
"""Single URL request for Uniprot proteome data."""
file_path = Path(UNIPROT_S3_DIR) / f"{PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
file_path = Path(RAW_DATA_DIR) / f"{PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
all_proteomes_query = "%28*%29"

url = construct_query_url(
Expand Down Expand Up @@ -236,9 +236,6 @@ def run_uniprot_api(taxa_id_from_proteomes_set, show_status: bool) -> None:
# Cache HTTP requests to avoid repeated calls
requests_cache.install_cache("uniprot_cache")

# Ensure the directory for storing Uniprot files exists
# Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True)

organism_list = get_organism_list()

taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_set))
Expand Down Expand Up @@ -271,15 +268,12 @@ def run_uniprot_api_parallel(taxa_id_from_proteomes_set, show_status: bool, work
# Cache HTTP requests to avoid repeated calls
requests_cache.install_cache("uniprot_cache")

# Ensure the directory for storing Uniprot files exists
# Path(UNIPROT_S3_DIR).mkdir(parents=True, exist_ok=True)

organism_list = get_organism_list()

taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_set))

# Write used IDs to file
file_path = Path(UNIPROT_S3_DIR) / f"{KGMICROBE_PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
file_path = Path(RAW_DATA_DIR) / f"{KGMICROBE_PROTEOMES_FILENAME}.{UNIPROT_DESIRED_FORMAT}"
with open(file_path, "w") as f:
for line in taxa_id_common_with_proteomes_list:
f.write(f"{line}\n")
Expand Down

0 comments on commit 12ec00c

Please sign in to comment.