Skip to content

Commit

Permalink
fix: remove duplication for downloading data and test data
Browse files Browse the repository at this point in the history
  • Loading branch information
vschaffn committed Jan 15, 2025
1 parent f0a24a7 commit 951e2a9
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 80 deletions.
57 changes: 3 additions & 54 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,18 @@
import os
import tarfile
import tempfile
import urllib
from distutils.dir_util import copy_tree
from typing import Callable

import pytest

_TESTDATA_DIRECTORY = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "tests", "test_data"))

# Define a URL to the xdem-data repository's test data
_TESTDATA_REPO_URL = "https://github.com/vschaffn/xdem-data/tarball/2-richdem_gdal"
_COMMIT_HASH = "31a7159c982cec4b352f0de82bd4e0be61db3afe"


def download_test_data(overwrite: bool = False) -> None:
"""
Download the entire test_data directory from the xdem-data repository.
:param overwrite: If True, re-downloads the data even if it already exists.
"""
if not overwrite and os.path.exists(_TESTDATA_DIRECTORY) and os.listdir(_TESTDATA_DIRECTORY):
return # Test data already exists

# Clear the directory if overwrite is True
if overwrite and os.path.exists(_TESTDATA_DIRECTORY):
for root, dirs, files in os.walk(_TESTDATA_DIRECTORY, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
from xdem.examples import download_and_extract_tarball

# Create a temporary directory to download the tarball
temp_dir = tempfile.TemporaryDirectory()
tar_path = os.path.join(temp_dir.name, "test_data.tar.gz")

# Construct the URL with the commit hash
url = f"{_TESTDATA_REPO_URL}#commit={_COMMIT_HASH}"

response = urllib.request.urlopen(url)
if response.getcode() == 200:
with open(tar_path, "wb") as outfile:
outfile.write(response.read())
else:
raise ValueError(f"Failed to download test data: {response.status_code}")

# Extract the tarball
with tarfile.open(tar_path) as tar:
tar.extractall(temp_dir.name)

# Copy the test_data directory to the target directory
extracted_dir = os.path.join(
temp_dir.name,
[dirname for dirname in os.listdir(temp_dir.name) if os.path.isdir(os.path.join(temp_dir.name, dirname))][0],
"test_data",
)

copy_tree(extracted_dir, _TESTDATA_DIRECTORY)
_TESTDATA_DIRECTORY = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "tests", "test_data"))


@pytest.fixture(scope="session") # type: ignore
def get_test_data_path() -> Callable[[str], str]:
def _get_test_data_path(filename: str, overwrite: bool = False) -> str:
"""Get file from test_data"""
download_test_data(overwrite=overwrite) # Ensure the test data is downloaded
download_and_extract_tarball(dir="test_data", target_dir=_TESTDATA_DIRECTORY, overwrite=overwrite)
file_path = os.path.join(_TESTDATA_DIRECTORY, filename)

if not os.path.exists(file_path):
Expand Down
63 changes: 37 additions & 26 deletions xdem/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

"""Utility functions to download and find example data."""
import os
import shutil
import tarfile
import tempfile
import urllib.request
Expand All @@ -27,6 +28,10 @@

import xdem

_DATA_REPO_URL = "https://github.com/vschaffn/xdem-data/tarball/2-richdem_gdal"
_COMMIT_HASH = "31a7159c982cec4b352f0de82bd4e0be61db3afe"


_EXAMPLES_DIRECTORY = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "examples", "data"))
# Absolute filepaths to the example files.
_FILEPATHS_DATA = {
Expand All @@ -48,55 +53,61 @@
available = list(_FILEPATHS_DATA.keys()) + list(_FILEPATHS_PROCESSED.keys())


def download_longyearbyen_examples(overwrite: bool = False) -> None:
def download_and_extract_tarball(dir: str, target_dir: str, overwrite: bool = False) -> None:
"""
Fetch the Longyearbyen example files.
Helper function to download and extract a tarball from a given URL.
:param overwrite: Do not download the files again if they already exist.
:param dir: the directory to import.
:param target_dir: The directory to extract the files into.
:param overwrite: Whether to overwrite existing files.
"""
if not overwrite and all(map(os.path.isfile, list(_FILEPATHS_DATA.values()))):
# print("Datasets exist")
return

# If we ask for overwrite, also remove the processed test data
if overwrite:
for fn in list(_FILEPATHS_PROCESSED.values()):
if os.path.exists(fn):
os.remove(fn)
# Exit code if files already exist
if not overwrite and os.path.exists(target_dir) and os.listdir(target_dir):
return

# Static commit hash to be bumped every time it needs to be.
commit = "ff5ede952fc422ebd2a3c6340041a118850bc905"
# The URL from which to download the repository
url = f"https://github.com/GlacioHack/xdem-data/tarball/main#commit={commit}"
# To test new data from a user-branch before merging in xdem-data
# url = f"https://github.com/ameliefroessl/xdem-data/tarball/cog-files#commit={commit}"
if overwrite and os.path.exists(target_dir):
# Clear existing files
shutil.rmtree(target_dir)

# Create a temporary directory to extract the tarball in.
# Create a temporary directory to download the tarball
temp_dir = tempfile.TemporaryDirectory()
tar_path = os.path.join(temp_dir.name, "data.tar.gz")

# Construct the URL with the commit hash
url = f"{_DATA_REPO_URL}#commit={_COMMIT_HASH}"

# Download the tarball
response = urllib.request.urlopen(url)
# If the response was right, download the tarball to the temporary directory
if response.getcode() == 200:
with open(tar_path, "wb") as outfile:
outfile.write(response.read())
else:
raise ValueError(f"Longyearbyen data fetch gave non-200 response: {response.status_code}.")
raise ValueError(f"Failed to download data: {response.status_code}")

# Extract the tarball
with tarfile.open(tar_path) as tar:
tar.extractall(temp_dir.name)

# Find the first directory in the temp_dir (should only be one) and construct the Longyearbyen data dir path.
dir_name = os.path.join(
# Find the first directory inside the extracted tarball
extracted_dir = os.path.join(
temp_dir.name,
[dirname for dirname in os.listdir(temp_dir.name) if os.path.isdir(os.path.join(temp_dir.name, dirname))][0],
"data",
"Longyearbyen",
dir,
)

# Copy the data to the examples directory.
copy_tree(dir_name, os.path.join(_EXAMPLES_DIRECTORY, "Longyearbyen", "data"))
# Copy the extracted data to the target directory
copy_tree(extracted_dir, target_dir)


def download_longyearbyen_examples(overwrite: bool = False) -> None:
"""
Fetch the Longyearbyen example files.
:param overwrite: Do not download the files again if they already exist.
"""
target_dir = os.path.join(_EXAMPLES_DIRECTORY, "Longyearbyen", "data")
download_and_extract_tarball(dir="data/Longyearbyen", target_dir=target_dir, overwrite=overwrite)


def process_coregistered_examples(name: str, overwrite: bool = False) -> None:
Expand Down

0 comments on commit 951e2a9

Please sign in to comment.