From 03859cbb2ceac4d9fe8bcd1644340235f43ea520 Mon Sep 17 00:00:00 2001 From: Yichi Yang Date: Fri, 10 Jun 2022 14:40:16 -0700 Subject: [PATCH] Resume incomplete download --- src/pip/_internal/network/download.py | 41 +++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 79b82a570e5..9f827db00cf 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -26,9 +26,36 @@ def _get_http_response_size(resp: Response) -> Optional[int]: except (ValueError, KeyError, TypeError): return None +def _get_http_accept_ranges(resp: Response) -> Optional[str]: + return resp.headers.get("accept-ranges", None) + +def _resume_if_incomplete( + resp: Response, + session: PipSession, + link: Link +) -> Iterable[bytes]: + chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) + received_length = 0 + + for chunk in chunks: + received_length += len(chunk) + yield chunk + + total_length = _get_http_response_size(resp) + accept_ranges = _get_http_accept_ranges(resp) + + if total_length is not None and accept_ranges == 'bytes': + while received_length < total_length: + logger.info("Resuming incomplete download (%s received)", format_size(received_length)) + resume_resp = _http_get_download(session, link, range_start=received_length) + resume_chunks = response_chunks(resume_resp, CONTENT_CHUNK_SIZE) + for chunk in resume_chunks: + received_length += len(chunk) + yield chunk def _prepare_download( resp: Response, + session: PipSession, link: Link, progress_bar: str, ) -> Iterable[bytes]: @@ -60,7 +87,7 @@ def _prepare_download( else: show_progress = False - chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) + chunks = _resume_if_incomplete(resp, session, link) if not show_progress: return chunks @@ -112,9 +139,13 @@ def _get_http_response_filename(resp: Response, link: Link) -> str: return filename -def _http_get_download(session: PipSession, link: Link) -> Response: +def _http_get_download(session: PipSession, link: Link, range_start: Optional[int] = None) -> Response: target_url = link.url.split("#", 1)[0] - resp = session.get(target_url, headers=HEADERS, stream=True) + if range_start is not None: + headers = {**HEADERS, "Range": "bytes={}-".format(range_start)} + else: + headers = HEADERS + resp = session.get(target_url, headers=headers, stream=True) raise_for_status(resp) return resp @@ -142,7 +173,7 @@ def __call__(self, link: Link, location: str) -> Tuple[str, str]: filename = _get_http_response_filename(resp, link) filepath = os.path.join(location, filename) - chunks = _prepare_download(resp, link, self._progress_bar) + chunks = _prepare_download(resp, self._session, link, self._progress_bar) with open(filepath, "wb") as content_file: for chunk in chunks: content_file.write(chunk) @@ -178,7 +209,7 @@ def __call__( filename = _get_http_response_filename(resp, link) filepath = os.path.join(location, filename) - chunks = _prepare_download(resp, link, self._progress_bar) + chunks = _prepare_download(resp, self._session, link, self._progress_bar) with open(filepath, "wb") as content_file: for chunk in chunks: content_file.write(chunk)