Skip to content

Commit

Permalink
Remove the html5lib deprecated feature flag.
Browse files Browse the repository at this point in the history
  • Loading branch information
sbidoul committed Jul 16, 2022
1 parent dc00479 commit bb2a3d7
Show file tree
Hide file tree
Showing 14 changed files with 21 additions and 151 deletions.
1 change: 1 addition & 0 deletions news/10825.removal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove the ``html5lib`` deprecated feature flag.
1 change: 0 additions & 1 deletion src/pip/_internal/cli/cmdoptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,6 @@ def check_list_path_option(options: Values) -> None:
default=[],
choices=[
"legacy-resolver",
"html5lib",
],
help=("Enable deprecated functionality, that will be removed in the future."),
)
Expand Down
1 change: 0 additions & 1 deletion src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,5 +499,4 @@ def _build_package_finder(
link_collector=link_collector,
selection_prefs=selection_prefs,
target_python=target_python,
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
)
1 change: 0 additions & 1 deletion src/pip/_internal/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ def _build_package_finder(
link_collector=link_collector,
selection_prefs=selection_prefs,
target_python=target_python,
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
)

def get_available_package_versions(self, options: Values, args: List[Any]) -> None:
Expand Down
1 change: 0 additions & 1 deletion src/pip/_internal/commands/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def _build_package_finder(
return PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
)

def run(self, options: Values, args: List[str]) -> int:
Expand Down
72 changes: 8 additions & 64 deletions src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
Union,
)

from pip._vendor import html5lib, requests
from pip._vendor import requests
from pip._vendor.requests import Response
from pip._vendor.requests.exceptions import RetryError, SSLError

Expand Down Expand Up @@ -191,27 +191,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
return None


def _determine_base_url(document: HTMLElement, page_url: str) -> str:
"""Determine the HTML document's base URL.
This looks for a ``<base>`` tag in the HTML document. If present, its href
attribute denotes the base URL of anchor tags in the document. If there is
no such tag (or if it does not have a valid href attribute), the HTML
file's URL is used as the base URL.
:param document: An HTML document representation. The current
implementation expects the result of ``html5lib.parse()``.
:param page_url: The URL of the HTML document.
TODO: Remove when `html5lib` is dropped.
"""
for base in document.findall(".//base"):
href = base.get("href")
if href is not None:
return href
return page_url


def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
Expand Down Expand Up @@ -313,9 +292,7 @@ def __hash__(self) -> int:


class ParseLinks(Protocol):
def __call__(
self, page: "IndexContent", use_deprecated_html5lib: bool
) -> Iterable[Link]:
def __call__(self, page: "IndexContent") -> Iterable[Link]:
...


Expand All @@ -327,49 +304,20 @@ def with_cached_index_content(fn: ParseLinks) -> ParseLinks:
"""

@functools.lru_cache(maxsize=None)
def wrapper(
cacheable_page: CacheablePageContent, use_deprecated_html5lib: bool
) -> List[Link]:
return list(fn(cacheable_page.page, use_deprecated_html5lib))
def wrapper(cacheable_page: CacheablePageContent) -> List[Link]:
return list(fn(cacheable_page.page))

@functools.wraps(fn)
def wrapper_wrapper(
page: "IndexContent", use_deprecated_html5lib: bool
) -> List[Link]:
def wrapper_wrapper(page: "IndexContent") -> List[Link]:
if page.cache_link_parsing:
return wrapper(CacheablePageContent(page), use_deprecated_html5lib)
return list(fn(page, use_deprecated_html5lib))
return wrapper(CacheablePageContent(page))
return list(fn(page))

return wrapper_wrapper


def _parse_links_html5lib(page: "IndexContent") -> Iterable[Link]:
"""
Parse an HTML document, and yield its anchor elements as Link objects.
TODO: Remove when `html5lib` is dropped.
"""
document = html5lib.parse(
page.content,
transport_encoding=page.encoding,
namespaceHTMLElements=False,
)

url = page.url
base_url = _determine_base_url(document, url)
for anchor in document.findall(".//a"):
link = _create_link_from_element(
anchor.attrib,
page_url=url,
base_url=base_url,
)
if link is None:
continue
yield link


@with_cached_index_content
def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable[Link]:
def parse_links(page: "IndexContent") -> Iterable[Link]:
"""
Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
"""
Expand Down Expand Up @@ -398,10 +346,6 @@ def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable
hashes=file.get("hashes", {}),
)

if use_deprecated_html5lib:
yield from _parse_links_html5lib(page)
return

parser = HTMLLinkParser(page.url)
encoding = page.encoding or "utf-8"
parser.feed(page.content.decode(encoding))
Expand Down
7 changes: 1 addition & 6 deletions src/pip/_internal/index/package_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,6 @@ def __init__(
link_collector: LinkCollector,
target_python: TargetPython,
allow_yanked: bool,
use_deprecated_html5lib: bool,
format_control: Optional[FormatControl] = None,
candidate_prefs: Optional[CandidatePreferences] = None,
ignore_requires_python: Optional[bool] = None,
Expand All @@ -623,7 +622,6 @@ def __init__(
self._ignore_requires_python = ignore_requires_python
self._link_collector = link_collector
self._target_python = target_python
self._use_deprecated_html5lib = use_deprecated_html5lib

self.format_control = format_control

Expand All @@ -640,8 +638,6 @@ def create(
link_collector: LinkCollector,
selection_prefs: SelectionPreferences,
target_python: Optional[TargetPython] = None,
*,
use_deprecated_html5lib: bool,
) -> "PackageFinder":
"""Create a PackageFinder.
Expand All @@ -666,7 +662,6 @@ def create(
allow_yanked=selection_prefs.allow_yanked,
format_control=selection_prefs.format_control,
ignore_requires_python=selection_prefs.ignore_requires_python,
use_deprecated_html5lib=use_deprecated_html5lib,
)

@property
Expand Down Expand Up @@ -796,7 +791,7 @@ def process_project_url(
if index_response is None:
return []

page_links = list(parse_links(index_response, self._use_deprecated_html5lib))
page_links = list(parse_links(index_response))

with indent_log():
package_links = self.evaluate_links(
Expand Down
1 change: 0 additions & 1 deletion src/pip/_internal/self_outdated_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def _get_current_remote_pip_version(
finder = PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
use_deprecated_html5lib=("html5lib" in options.deprecated_features_enabled),
)
best_candidate = finder.find_best_candidate("pip").best_candidate
if best_candidate is None:
Expand Down
1 change: 0 additions & 1 deletion tests/functional/test_build_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def run_with_build_env(
finder = PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
use_deprecated_html5lib=False,
)
with global_tempdir_manager():
Expand Down
2 changes: 0 additions & 2 deletions tests/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def make_test_finder(
allow_all_prereleases: bool = False,
session: Optional[PipSession] = None,
target_python: Optional[TargetPython] = None,
use_deprecated_html5lib: bool = False,
) -> PackageFinder:
"""
Create a PackageFinder for testing purposes.
Expand All @@ -134,7 +133,6 @@ def make_test_finder(
link_collector=link_collector,
selection_prefs=selection_prefs,
target_python=target_python,
use_deprecated_html5lib=use_deprecated_html5lib,
)


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/resolution_resolvelib/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def finder(data: TestData) -> Iterator[PackageFinder]:
scope = SearchScope([str(data.packages)], [])
collector = LinkCollector(session, scope)
prefs = SelectionPreferences(allow_yanked=False)
finder = PackageFinder.create(collector, prefs, use_deprecated_html5lib=False)
finder = PackageFinder.create(collector, prefs)
yield finder


Expand Down
54 changes: 6 additions & 48 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
from unittest import mock

import pytest
from pip._vendor import html5lib, requests
from pip._vendor import requests

from pip._internal.exceptions import NetworkConnectionError
from pip._internal.index.collector import (
IndexContent,
LinkCollector,
_clean_link,
_clean_url_path,
_determine_base_url,
_get_index_content,
_get_simple_response,
_make_index_content,
Expand Down Expand Up @@ -249,33 +248,6 @@ def test_get_simple_response_dont_log_clear_text_password(
]


@pytest.mark.parametrize(
("html", "url", "expected"),
[
(b"<html></html>", "https://example.com/", "https://example.com/"),
(
b'<html><head><base href="https://foo.example.com/"></head></html>',
"https://example.com/",
"https://foo.example.com/",
),
(
b"<html><head>"
b'<base><base href="https://foo.example.com/">'
b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),
],
)
def test_determine_base_url(html: bytes, url: str, expected: str) -> None:
document = html5lib.parse(
html,
transport_encoding=None,
namespaceHTMLElements=False,
)
assert _determine_base_url(document, url) == expected


@pytest.mark.parametrize(
("path", "expected"),
[
Expand Down Expand Up @@ -451,7 +423,7 @@ def _test_parse_links_data_attribute(
# the page content isn't cached.
url=f"https://example.com/simple-{uuid.uuid4()}/",
)
links = list(parse_links(page, use_deprecated_html5lib=False))
links = list(parse_links(page))
(link,) = links
actual = getattr(link, attr)
assert actual == expected
Expand Down Expand Up @@ -513,7 +485,7 @@ def test_parse_links_json() -> None:
# the page content isn't cached.
url=f"https://example.com/simple-{uuid.uuid4()}/",
)
links = list(parse_links(page, use_deprecated_html5lib=False))
links = list(parse_links(page))

assert links == [
Link(
Expand Down Expand Up @@ -597,33 +569,19 @@ def test_parse_links_caches_same_page_by_url() -> None:
cache_link_parsing=False,
)

parsed_links_1 = list(parse_links(page_1, use_deprecated_html5lib=False))
parsed_links_1 = list(parse_links(page_1))
assert len(parsed_links_1) == 1
assert "pkg1" in parsed_links_1[0].url

parsed_links_2 = list(parse_links(page_2, use_deprecated_html5lib=False))
parsed_links_2 = list(parse_links(page_2))
assert parsed_links_2 == parsed_links_1

parsed_links_3 = list(parse_links(page_3, use_deprecated_html5lib=False))
parsed_links_3 = list(parse_links(page_3))
assert len(parsed_links_3) == 1
assert parsed_links_3 != parsed_links_1
assert "pkg2" in parsed_links_3[0].url


def test_parse_link_handles_deprecated_usage_properly() -> None:
html = b'<a href="/pkg1-1.0.tar.gz"></a><a href="/pkg1-2.0.tar.gz"></a>'
url = "https://example.com/simple/"
page = IndexContent(
html, "text/html", encoding=None, url=url, cache_link_parsing=False
)

parsed_links = list(parse_links(page, use_deprecated_html5lib=True))

assert len(parsed_links) == 2
assert "pkg1-1.0" in parsed_links[0].url
assert "pkg1-2.0" in parsed_links[1].url


@mock.patch("pip._internal.index.collector.raise_for_status")
def test_request_http_error(
mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture
Expand Down
19 changes: 4 additions & 15 deletions tests/unit/test_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,7 @@ def test_incorrect_case_file_index(data: TestData) -> None:


@pytest.mark.network
@pytest.mark.parametrize("use_deprecated_html5lib", [False, True])
def test_finder_detects_latest_already_satisfied_find_links(
data: TestData, use_deprecated_html5lib: bool
) -> None:
def test_finder_detects_latest_already_satisfied_find_links(data: TestData) -> None:
"""Test PackageFinder detects latest already satisfied using find-links"""
req = install_req_from_line("simple", None)
# the latest simple in local pkgs is 3.0
Expand All @@ -93,19 +90,14 @@ def test_finder_detects_latest_already_satisfied_find_links(
version=parse_version(latest_version),
)
req.satisfied_by = satisfied_by
finder = make_test_finder(
find_links=[data.find_links], use_deprecated_html5lib=use_deprecated_html5lib
)
finder = make_test_finder(find_links=[data.find_links])

with pytest.raises(BestVersionAlreadyInstalled):
finder.find_requirement(req, True)


@pytest.mark.network
@pytest.mark.parametrize("use_deprecated_html5lib", [False, True])
def test_finder_detects_latest_already_satisfied_pypi_links(
use_deprecated_html5lib: bool,
) -> None:
def test_finder_detects_latest_already_satisfied_pypi_links() -> None:
"""Test PackageFinder detects latest already satisfied using pypi links"""
req = install_req_from_line("initools", None)
# the latest initools on PyPI is 0.3.1
Expand All @@ -115,10 +107,7 @@ def test_finder_detects_latest_already_satisfied_pypi_links(
version=parse_version(latest_version),
)
req.satisfied_by = satisfied_by
finder = make_test_finder(
index_urls=["http://pypi.org/simple/"],
use_deprecated_html5lib=use_deprecated_html5lib,
)
finder = make_test_finder(index_urls=["http://pypi.org/simple/"])

with pytest.raises(BestVersionAlreadyInstalled):
finder.find_requirement(req, True)
Expand Down
Loading

0 comments on commit bb2a3d7

Please sign in to comment.