From bb2a3d741006c180e3878d0843e0b0ebf3521f52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Thu, 14 Jul 2022 18:15:07 +0200 Subject: [PATCH] Remove the html5lib deprecated feature flag. --- news/10825.removal.rst | 1 + src/pip/_internal/cli/cmdoptions.py | 1 - src/pip/_internal/cli/req_command.py | 1 - src/pip/_internal/commands/index.py | 1 - src/pip/_internal/commands/list.py | 1 - src/pip/_internal/index/collector.py | 72 +++----------------- src/pip/_internal/index/package_finder.py | 7 +- src/pip/_internal/self_outdated_check.py | 1 - tests/functional/test_build_env.py | 1 - tests/lib/__init__.py | 2 - tests/unit/resolution_resolvelib/conftest.py | 2 +- tests/unit/test_collector.py | 54 ++------------- tests/unit/test_finder.py | 19 ++---- tests/unit/test_index.py | 9 --- 14 files changed, 21 insertions(+), 151 deletions(-) create mode 100644 news/10825.removal.rst diff --git a/news/10825.removal.rst b/news/10825.removal.rst new file mode 100644 index 00000000000..542a5f775aa --- /dev/null +++ b/news/10825.removal.rst @@ -0,0 +1 @@ +Remove the ``html5lib`` deprecated feature flag. diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index 35ae1e68c2e..47ed92779e9 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -1013,7 +1013,6 @@ def check_list_path_option(options: Values) -> None: default=[], choices=[ "legacy-resolver", - "html5lib", ], help=("Enable deprecated functionality, that will be removed in the future."), ) diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index b289426cfe8..1044809f040 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -499,5 +499,4 @@ def _build_package_finder( link_collector=link_collector, selection_prefs=selection_prefs, target_python=target_python, - use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled, ) diff --git a/src/pip/_internal/commands/index.py b/src/pip/_internal/commands/index.py index 9d8aae3b542..b4bf0ac06e1 100644 --- a/src/pip/_internal/commands/index.py +++ b/src/pip/_internal/commands/index.py @@ -97,7 +97,6 @@ def _build_package_finder( link_collector=link_collector, selection_prefs=selection_prefs, target_python=target_python, - use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled, ) def get_available_package_versions(self, options: Values, args: List[Any]) -> None: diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index fc229efc242..a9b08a0bc88 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -149,7 +149,6 @@ def _build_package_finder( return PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled, ) def run(self, options: Values, args: List[str]) -> int: diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index 04646ae1121..6e5dac5ad3c 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -29,7 +29,7 @@ Union, ) -from pip._vendor import html5lib, requests +from pip._vendor import requests from pip._vendor.requests import Response from pip._vendor.requests.exceptions import RetryError, SSLError @@ -191,27 +191,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]: return None -def _determine_base_url(document: HTMLElement, page_url: str) -> str: - """Determine the HTML document's base URL. - - This looks for a ```` tag in the HTML document. If present, its href - attribute denotes the base URL of anchor tags in the document. If there is - no such tag (or if it does not have a valid href attribute), the HTML - file's URL is used as the base URL. - - :param document: An HTML document representation. The current - implementation expects the result of ``html5lib.parse()``. - :param page_url: The URL of the HTML document. - - TODO: Remove when `html5lib` is dropped. - """ - for base in document.findall(".//base"): - href = base.get("href") - if href is not None: - return href - return page_url - - def _clean_url_path_part(part: str) -> str: """ Clean a "part" of a URL path (i.e. after splitting on "@" characters). @@ -313,9 +292,7 @@ def __hash__(self) -> int: class ParseLinks(Protocol): - def __call__( - self, page: "IndexContent", use_deprecated_html5lib: bool - ) -> Iterable[Link]: + def __call__(self, page: "IndexContent") -> Iterable[Link]: ... @@ -327,49 +304,20 @@ def with_cached_index_content(fn: ParseLinks) -> ParseLinks: """ @functools.lru_cache(maxsize=None) - def wrapper( - cacheable_page: CacheablePageContent, use_deprecated_html5lib: bool - ) -> List[Link]: - return list(fn(cacheable_page.page, use_deprecated_html5lib)) + def wrapper(cacheable_page: CacheablePageContent) -> List[Link]: + return list(fn(cacheable_page.page)) @functools.wraps(fn) - def wrapper_wrapper( - page: "IndexContent", use_deprecated_html5lib: bool - ) -> List[Link]: + def wrapper_wrapper(page: "IndexContent") -> List[Link]: if page.cache_link_parsing: - return wrapper(CacheablePageContent(page), use_deprecated_html5lib) - return list(fn(page, use_deprecated_html5lib)) + return wrapper(CacheablePageContent(page)) + return list(fn(page)) return wrapper_wrapper -def _parse_links_html5lib(page: "IndexContent") -> Iterable[Link]: - """ - Parse an HTML document, and yield its anchor elements as Link objects. - - TODO: Remove when `html5lib` is dropped. - """ - document = html5lib.parse( - page.content, - transport_encoding=page.encoding, - namespaceHTMLElements=False, - ) - - url = page.url - base_url = _determine_base_url(document, url) - for anchor in document.findall(".//a"): - link = _create_link_from_element( - anchor.attrib, - page_url=url, - base_url=base_url, - ) - if link is None: - continue - yield link - - @with_cached_index_content -def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable[Link]: +def parse_links(page: "IndexContent") -> Iterable[Link]: """ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. """ @@ -398,10 +346,6 @@ def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable hashes=file.get("hashes", {}), ) - if use_deprecated_html5lib: - yield from _parse_links_html5lib(page) - return - parser = HTMLLinkParser(page.url) encoding = page.encoding or "utf-8" parser.feed(page.content.decode(encoding)) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index dbb6a64066c..9bf247f0246 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -598,7 +598,6 @@ def __init__( link_collector: LinkCollector, target_python: TargetPython, allow_yanked: bool, - use_deprecated_html5lib: bool, format_control: Optional[FormatControl] = None, candidate_prefs: Optional[CandidatePreferences] = None, ignore_requires_python: Optional[bool] = None, @@ -623,7 +622,6 @@ def __init__( self._ignore_requires_python = ignore_requires_python self._link_collector = link_collector self._target_python = target_python - self._use_deprecated_html5lib = use_deprecated_html5lib self.format_control = format_control @@ -640,8 +638,6 @@ def create( link_collector: LinkCollector, selection_prefs: SelectionPreferences, target_python: Optional[TargetPython] = None, - *, - use_deprecated_html5lib: bool, ) -> "PackageFinder": """Create a PackageFinder. @@ -666,7 +662,6 @@ def create( allow_yanked=selection_prefs.allow_yanked, format_control=selection_prefs.format_control, ignore_requires_python=selection_prefs.ignore_requires_python, - use_deprecated_html5lib=use_deprecated_html5lib, ) @property @@ -796,7 +791,7 @@ def process_project_url( if index_response is None: return [] - page_links = list(parse_links(index_response, self._use_deprecated_html5lib)) + page_links = list(parse_links(index_response)) with indent_log(): package_links = self.evaluate_links( diff --git a/src/pip/_internal/self_outdated_check.py b/src/pip/_internal/self_outdated_check.py index ad62dd27b00..83cb0c543c1 100644 --- a/src/pip/_internal/self_outdated_check.py +++ b/src/pip/_internal/self_outdated_check.py @@ -173,7 +173,6 @@ def _get_current_remote_pip_version( finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib=("html5lib" in options.deprecated_features_enabled), ) best_candidate = finder.find_best_candidate("pip").best_candidate if best_candidate is None: diff --git a/tests/functional/test_build_env.py b/tests/functional/test_build_env.py index d2ab79681ed..6936246183c 100644 --- a/tests/functional/test_build_env.py +++ b/tests/functional/test_build_env.py @@ -49,7 +49,6 @@ def run_with_build_env( finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib=False, ) with global_tempdir_manager(): diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index 43624c16614..8774d8bc144 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -115,7 +115,6 @@ def make_test_finder( allow_all_prereleases: bool = False, session: Optional[PipSession] = None, target_python: Optional[TargetPython] = None, - use_deprecated_html5lib: bool = False, ) -> PackageFinder: """ Create a PackageFinder for testing purposes. @@ -134,7 +133,6 @@ def make_test_finder( link_collector=link_collector, selection_prefs=selection_prefs, target_python=target_python, - use_deprecated_html5lib=use_deprecated_html5lib, ) diff --git a/tests/unit/resolution_resolvelib/conftest.py b/tests/unit/resolution_resolvelib/conftest.py index a8889293bc3..9ef9f8c5c18 100644 --- a/tests/unit/resolution_resolvelib/conftest.py +++ b/tests/unit/resolution_resolvelib/conftest.py @@ -26,7 +26,7 @@ def finder(data: TestData) -> Iterator[PackageFinder]: scope = SearchScope([str(data.packages)], []) collector = LinkCollector(session, scope) prefs = SelectionPreferences(allow_yanked=False) - finder = PackageFinder.create(collector, prefs, use_deprecated_html5lib=False) + finder = PackageFinder.create(collector, prefs) yield finder diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index eff2594cad9..3afc5210dc7 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -10,7 +10,7 @@ from unittest import mock import pytest -from pip._vendor import html5lib, requests +from pip._vendor import requests from pip._internal.exceptions import NetworkConnectionError from pip._internal.index.collector import ( @@ -18,7 +18,6 @@ LinkCollector, _clean_link, _clean_url_path, - _determine_base_url, _get_index_content, _get_simple_response, _make_index_content, @@ -249,33 +248,6 @@ def test_get_simple_response_dont_log_clear_text_password( ] -@pytest.mark.parametrize( - ("html", "url", "expected"), - [ - (b"", "https://example.com/", "https://example.com/"), - ( - b'', - "https://example.com/", - "https://foo.example.com/", - ), - ( - b"" - b'' - b"", - "https://example.com/", - "https://foo.example.com/", - ), - ], -) -def test_determine_base_url(html: bytes, url: str, expected: str) -> None: - document = html5lib.parse( - html, - transport_encoding=None, - namespaceHTMLElements=False, - ) - assert _determine_base_url(document, url) == expected - - @pytest.mark.parametrize( ("path", "expected"), [ @@ -451,7 +423,7 @@ def _test_parse_links_data_attribute( # the page content isn't cached. url=f"https://example.com/simple-{uuid.uuid4()}/", ) - links = list(parse_links(page, use_deprecated_html5lib=False)) + links = list(parse_links(page)) (link,) = links actual = getattr(link, attr) assert actual == expected @@ -513,7 +485,7 @@ def test_parse_links_json() -> None: # the page content isn't cached. url=f"https://example.com/simple-{uuid.uuid4()}/", ) - links = list(parse_links(page, use_deprecated_html5lib=False)) + links = list(parse_links(page)) assert links == [ Link( @@ -597,33 +569,19 @@ def test_parse_links_caches_same_page_by_url() -> None: cache_link_parsing=False, ) - parsed_links_1 = list(parse_links(page_1, use_deprecated_html5lib=False)) + parsed_links_1 = list(parse_links(page_1)) assert len(parsed_links_1) == 1 assert "pkg1" in parsed_links_1[0].url - parsed_links_2 = list(parse_links(page_2, use_deprecated_html5lib=False)) + parsed_links_2 = list(parse_links(page_2)) assert parsed_links_2 == parsed_links_1 - parsed_links_3 = list(parse_links(page_3, use_deprecated_html5lib=False)) + parsed_links_3 = list(parse_links(page_3)) assert len(parsed_links_3) == 1 assert parsed_links_3 != parsed_links_1 assert "pkg2" in parsed_links_3[0].url -def test_parse_link_handles_deprecated_usage_properly() -> None: - html = b'' - url = "https://example.com/simple/" - page = IndexContent( - html, "text/html", encoding=None, url=url, cache_link_parsing=False - ) - - parsed_links = list(parse_links(page, use_deprecated_html5lib=True)) - - assert len(parsed_links) == 2 - assert "pkg1-1.0" in parsed_links[0].url - assert "pkg1-2.0" in parsed_links[1].url - - @mock.patch("pip._internal.index.collector.raise_for_status") def test_request_http_error( mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture diff --git a/tests/unit/test_finder.py b/tests/unit/test_finder.py index 16c24014459..366b7eeb4d1 100644 --- a/tests/unit/test_finder.py +++ b/tests/unit/test_finder.py @@ -80,10 +80,7 @@ def test_incorrect_case_file_index(data: TestData) -> None: @pytest.mark.network -@pytest.mark.parametrize("use_deprecated_html5lib", [False, True]) -def test_finder_detects_latest_already_satisfied_find_links( - data: TestData, use_deprecated_html5lib: bool -) -> None: +def test_finder_detects_latest_already_satisfied_find_links(data: TestData) -> None: """Test PackageFinder detects latest already satisfied using find-links""" req = install_req_from_line("simple", None) # the latest simple in local pkgs is 3.0 @@ -93,19 +90,14 @@ def test_finder_detects_latest_already_satisfied_find_links( version=parse_version(latest_version), ) req.satisfied_by = satisfied_by - finder = make_test_finder( - find_links=[data.find_links], use_deprecated_html5lib=use_deprecated_html5lib - ) + finder = make_test_finder(find_links=[data.find_links]) with pytest.raises(BestVersionAlreadyInstalled): finder.find_requirement(req, True) @pytest.mark.network -@pytest.mark.parametrize("use_deprecated_html5lib", [False, True]) -def test_finder_detects_latest_already_satisfied_pypi_links( - use_deprecated_html5lib: bool, -) -> None: +def test_finder_detects_latest_already_satisfied_pypi_links() -> None: """Test PackageFinder detects latest already satisfied using pypi links""" req = install_req_from_line("initools", None) # the latest initools on PyPI is 0.3.1 @@ -115,10 +107,7 @@ def test_finder_detects_latest_already_satisfied_pypi_links( version=parse_version(latest_version), ) req.satisfied_by = satisfied_by - finder = make_test_finder( - index_urls=["http://pypi.org/simple/"], - use_deprecated_html5lib=use_deprecated_html5lib, - ) + finder = make_test_finder(index_urls=["http://pypi.org/simple/"]) with pytest.raises(BestVersionAlreadyInstalled): finder.find_requirement(req, True) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 7051268545b..cd3c748b7aa 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -603,7 +603,6 @@ def test_create__candidate_prefs( finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib=False, ) candidate_prefs = finder._candidate_prefs assert candidate_prefs.allow_all_prereleases == allow_all_prereleases @@ -620,7 +619,6 @@ def test_create__link_collector(self) -> None: finder = PackageFinder.create( link_collector=link_collector, selection_prefs=SelectionPreferences(allow_yanked=True), - use_deprecated_html5lib=False, ) assert finder._link_collector is link_collector @@ -638,7 +636,6 @@ def test_create__target_python(self) -> None: link_collector=link_collector, selection_prefs=SelectionPreferences(allow_yanked=True), target_python=target_python, - use_deprecated_html5lib=False, ) actual_target_python = finder._target_python # The target_python attribute should be set as is. @@ -658,7 +655,6 @@ def test_create__target_python_none(self) -> None: link_collector=link_collector, selection_prefs=SelectionPreferences(allow_yanked=True), target_python=None, - use_deprecated_html5lib=False, ) # Spot-check the default TargetPython object. actual_target_python = finder._target_python @@ -678,7 +674,6 @@ def test_create__allow_yanked(self, allow_yanked: bool) -> None: finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib=False, ) assert finder._allow_yanked == allow_yanked @@ -698,7 +693,6 @@ def test_create__ignore_requires_python(self, ignore_requires_python: bool) -> N finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib=False, ) assert finder._ignore_requires_python == ignore_requires_python @@ -718,7 +712,6 @@ def test_create__format_control(self) -> None: finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, - use_deprecated_html5lib=False, ) actual_format_control = finder.format_control assert actual_format_control is format_control @@ -759,7 +752,6 @@ def test_make_link_evaluator( allow_yanked=allow_yanked, format_control=format_control, ignore_requires_python=ignore_requires_python, - use_deprecated_html5lib=False, ) # Pass a project_name that will be different from canonical_name. @@ -808,7 +800,6 @@ def test_make_candidate_evaluator( target_python=target_python, allow_yanked=True, candidate_prefs=candidate_prefs, - use_deprecated_html5lib=False, ) specifier = SpecifierSet()