Skip to content

Commit

Permalink
Raise exception on HTTP error when retrieving file from storage locat…
Browse files Browse the repository at this point in the history
…ion (#1074)
  • Loading branch information
BryanFauble authored Feb 27, 2024
1 parent 9753472 commit 647ae52
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 20 deletions.
1 change: 1 addition & 0 deletions synapseclient/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3087,6 +3087,7 @@ def _download_from_url_multi_threaded(
object_id=object_id,
object_type=object_type,
path=temp_destination,
debug=self.debug,
)

multithread_download.download_file(self, request)
Expand Down
35 changes: 23 additions & 12 deletions synapseclient/core/multithread_download/download_threads.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,26 @@
import threading as _threading
except ImportError:
import dummy_threading as _threading
import datetime

import concurrent.futures
import datetime
import os
import time
from contextlib import contextmanager
from http import HTTPStatus
import os
from requests import Session, Response
from requests.adapters import HTTPAdapter
from typing import Generator, NamedTuple
from urllib.parse import urlparse, parse_qs
from urllib.parse import parse_qs, urlparse

from requests import Response, Session
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time

from synapseclient.core.exceptions import SynapseError
from synapseclient.core.exceptions import SynapseError, _raise_for_status
from synapseclient.core.pool_provider import get_executor
from synapseclient.core.retry import (
with_retry,
RETRYABLE_CONNECTION_ERRORS,
RETRYABLE_CONNECTION_EXCEPTIONS,
with_retry,
)

# constants
Expand Down Expand Up @@ -57,12 +58,14 @@ class DownloadRequest(NamedTuple):
path : The local path to download the file to.
This path can be either an absolute path or
a relative path from where the code is executed to the download location.
debug: A boolean to specify if debug mode is on.
"""

file_handle_id: int
object_id: str
object_type: str
path: str
debug: bool = False


class TransferStatus(object):
Expand Down Expand Up @@ -207,18 +210,20 @@ def _get_new_session() -> Session:
return session


def _get_file_size(url: str) -> int:
def _get_file_size(url: str, debug: bool) -> int:
"""
Gets the size of the file located at url
Arguments:
url: The pre-signed url of the file
debug: A boolean to specify if debug mode is on
Returns:
The size of the file in bytes
"""
session = _get_new_session()
res_get = session.get(url, stream=True)
_raise_for_status(res_get, verbose=debug)
return int(res_get.headers["Content-Length"])


Expand Down Expand Up @@ -297,11 +302,17 @@ def __init__(self, syn, executor, max_concurrent_parts):
self._executor = executor
self._max_concurrent_parts = max_concurrent_parts

def download_file(self, request):
url_provider = PresignedUrlProvider(self._syn, request)
def download_file(self, request: DownloadRequest) -> None:
"""
Splits up and downloads a file in chunks from a URL.
Arguments:
request: A DownloadRequest object specifying what Synapse file to download.
"""
url_provider = PresignedUrlProvider(self._syn, request=request)

url_info = url_provider.get_info()
file_size = _get_file_size(url_info.url)
file_size = _get_file_size(url_info.url, request.debug)
chunk_range_generator = _generate_chunk_ranges(file_size)

self._prep_file(request)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
import concurrent.futures
import datetime
import os
import requests
import unittest.mock as mock
from unittest import TestCase

import pytest
from unittest import TestCase
import unittest.mock as mock
import requests
from requests import Response

import synapseclient.core.multithread_download.download_threads as download_threads
from synapseclient import Synapse
from synapseclient.core.exceptions import SynapseError, SynapseHTTPError
from synapseclient.core.multithread_download.download_threads import (
_MultithreadedDownloader,
download_file,
DownloadRequest,
PresignedUrlInfo,
PresignedUrlProvider,
TransferStatus,
_MultithreadedDownloader,
download_file,
)
from synapseclient.core.retry import DEFAULT_RETRIES

from synapseclient import Synapse
from synapseclient.core.exceptions import SynapseError


class TestPresignedUrlProvider(object):
def setup(self):
Expand Down Expand Up @@ -397,6 +397,57 @@ def test_download_file__error(self):
# should have been an attempt to cancel the Future
part_future_2.cancel.assert_called_once_with()

def test_download_file_error_in_retrieving_file_from_storage(self) -> None:
"""
Test that if an error occurs when retrieving the file from storage, the
error is raised.
"""

# GIVEN a download request
file_handle_id = 1234
entity_id = "syn123"
path = "/tmp/foo"
url = "http://foo.com/bar"
request = DownloadRequest(file_handle_id, entity_id, None, path)

# AND A mocked session
with mock.patch.object(
download_threads, "_get_new_session"
) as mock_get_new_session, mock.patch.object(
download_threads, "PresignedUrlProvider"
) as mock_url_provider_init:
mock_url_info = mock.create_autospec(PresignedUrlInfo, url=url)
mock_url_provider = mock.create_autospec(PresignedUrlProvider)
mock_url_provider.get_info.return_value = mock_url_info

mock_url_provider_init.return_value = mock_url_provider

# Create a mock session
mock_session = mock.Mock()
mock_get_new_session.return_value = mock_session

# AND the session.get() method returns a 403 error
# Create a mock Response
mock_response = mock.Mock(spec=Response)
mock_response.status_code = 403
mock_response.headers = {}
mock_response.reason = "mocked response text"

# Mock the .get() method on the session mock to return the mock Response
mock_session.get.return_value = mock_response

syn = mock.Mock()
executor = mock.Mock()
max_concurrent_parts = 5
downloader = _MultithreadedDownloader(syn, executor, max_concurrent_parts)

# WHEN the download_file method is called
with pytest.raises(SynapseHTTPError) as e:
downloader.download_file(request)

# THEN the error should be raised
assert "403 Client Error: mocked response text" in str(e.value)

@mock.patch.object(download_threads, "open")
def test_prep_file(self, mock_open):
"""Should open and close the file to create/truncate it"""
Expand Down

0 comments on commit 647ae52

Please sign in to comment.