Skip to content

Commit

Permalink
add error for invalid data locations
Browse files Browse the repository at this point in the history
  • Loading branch information
Moritz-Alexander-Kern committed Jun 14, 2024
1 parent ae5d4c4 commit 7de8853
Showing 1 changed file with 70 additions and 65 deletions.
135 changes: 70 additions & 65 deletions elephant/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import ssl
import tempfile
from urllib.parse import urlparse
import warnings
from os import environ, getenv
from pathlib import Path
Expand Down Expand Up @@ -75,72 +76,76 @@ def download(url, filepath=None, checksum=None, verbose=True):
def download_datasets(repo_path, filepath=None, checksum=None,
verbose=True):
r"""
This function can be used to download files from elephant-data using
only the path relative to the root of the elephant-data repository.
The default URL used, points to elephants corresponding release of
elephant-data.
Different versions of the elephant package may require different
versions of elephant-data.
e.g. the following URLs:
- https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/0.0.1
points to release v0.0.1.
- https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/master
always points to the latest state of elephant-data.
- https://datasets.python-elephant.org/
points to the root of elephant data
To change this URL, use the environment variable `ELEPHANT_DATA_LOCATION`.
When using data, which is not yet contained in the master branch or a
release of elephant data, e.g. during development, this variable can
be used to change the default URL.
For example to use data on branch `multitaper`, change the
`ELEPHANT_DATA_LOCATION` to
https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/multitaper.
For a complete example, see Examples section.
This function can be used to download files from elephant-data using
only the path relative to the root of the elephant-data repository.
The default URL used, points to elephants corresponding release of
elephant-data.
Different versions of the elephant package may require different
versions of elephant-data.
e.g. the following URLs:
- https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/0.0.1
points to release v0.0.1.
- https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/master
always points to the latest state of elephant-data.
- https://datasets.python-elephant.org/
points to the root of elephant data
To change this URL, use the environment variable `ELEPHANT_DATA_LOCATION`.
When using data, which is not yet contained in the master branch or a
release of elephant data, e.g. during development, this variable can
be used to change the default URL.
For example to use data on branch `multitaper`, change the
`ELEPHANT_DATA_LOCATION` to
https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/multitaper.
For a complete example, see Examples section.
To use a local copy of elephant-data, use the environment variable
`ELEPHANT_DATA_LOCATION`, e.g. set to /home/user/elephant-data.
To use a local copy of elephant-data, use the environment variable
`ELEPHANT_DATA_LOCATION`, e.g. set to /home/user/elephant-data.
Parameters
----------
repo_path : str
String denoting the path relative to elephant-data repository root
filepath : str, optional
Path to temporary folder where the downloaded files will be stored
checksum : str, optional
Checksum to verify data integrity after download
verbose : bool, optional
Whether to disable the entire progressbar wrapper [].
If set to None, disable on non-TTY.
Default: True
Returns
-------
filepath : pathlib.Path
Path to downloaded files.
Notes
-----
The default URL always points to elephant-data. Please
do not change its value. For development purposes use the environment
variable 'ELEPHANT_DATA_LOCATION'.
Examples
--------
The following example downloads a file from elephant-data branch
'multitaper', by setting the environment variable to the branch URL:
>>> import os
>>> from elephant.datasets import download_datasets
>>> os.environ["ELEPHANT_DATA_LOCATION"] = "https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/multitaper" # noqa
>>> download_datasets("unittest/spectral/multitaper_psd/data/time_series.npy") # doctest: +SKIP
PosixPath('/tmp/elephant/time_series.npy')
"""
Parameters
----------
repo_path : str
String denoting the path relative to elephant-data repository root
filepath : str, optional
Path to temporary folder where the downloaded files will be stored
checksum : str, optional
Checksum to verify data integrity after download
verbose : bool, optional
Whether to disable the entire progressbar wrapper [].
If set to None, disable on non-TTY.
Default: True
Returns
-------
filepath : pathlib.Path
Path to downloaded files.
Notes
-----
The default URL always points to elephant-data. Please
do not change its value. For development purposes use the environment
variable 'ELEPHANT_DATA_LOCATION'.
Examples
--------
The following example downloads a file from elephant-data branch
'multitaper', by setting the environment variable to the branch URL:
>>> import os
>>> from elephant.datasets import download_datasets
>>> os.environ["ELEPHANT_DATA_LOCATION"] = "https://web.gin.g-node.org/NeuralEnsemble/elephant-data/raw/multitaper" # noqa
>>> download_datasets("unittest/spectral/multitaper_psd/data/time_series.npy") # doctest: +SKIP
PosixPath('/tmp/elephant/time_series.npy')
"""

if 'ELEPHANT_DATA_LOCATION' in environ: # user did set path or URL
if os.path.exists(getenv('ELEPHANT_DATA_LOCATION')):
return Path(f"{getenv('ELEPHANT_DATA_LOCATION')}/{repo_path}")
env_var = 'ELEPHANT_DATA_LOCATION'
if env_var in os.environ: # user did set path or URL
if os.path.exists(getenv(env_var)):
return Path(f"{getenv(env_var)}/{repo_path}")
elif urlparse(getenv(env_var)).scheme not in ('http', 'https'):
raise ValueError(f"The environment variable {env_var} must be set to either an existing file system path "
f"or a valid URL. Given value: '{getenv(env_var)}' is neither.")

# this url redirects to the current location of elephant-data
url_to_root = "https://datasets.python-elephant.org/"
Expand All @@ -149,7 +154,7 @@ def download_datasets(repo_path, filepath=None, checksum=None,
# (version elephant is equal to version elephant-data)
default_url = url_to_root + f"raw/v{_get_version()}"

if 'ELEPHANT_DATA_LOCATION' not in environ: # user did not set URL
if env_var not in environ: # user did not set URL
# is 'version-URL' available? (not for elephant development version)
try:
urlopen(default_url+'/README.md')
Expand Down Expand Up @@ -177,7 +182,7 @@ def download_datasets(repo_path, filepath=None, checksum=None,
warnings.warn(f"Data URL:{default_url}, error: {error}."
f"{error.reason}")

url = f"{getenv('ELEPHANT_DATA_LOCATION', default_url)}/{repo_path}"
url = f"{getenv(env_var, default_url)}/{repo_path}"

return download(url, filepath, checksum, verbose)

Expand Down

0 comments on commit 7de8853

Please sign in to comment.