Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add include/exclude filter support for pull-through caching #747

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 25 additions & 19 deletions pulp_python/app/pypi/views.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import requests

Expand All @@ -23,7 +24,7 @@
from packaging.utils import canonicalize_name
from urllib.parse import urljoin, urlparse, urlunsplit
from pathlib import PurePath
from pypi_simple import parse_links_stream_response
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage

from pulpcore.plugin.viewsets import OperationPostponedResponse
from pulpcore.plugin.tasking import dispatch
Expand All @@ -45,6 +46,7 @@
python_content_to_json,
PYPI_LAST_SERIAL,
PYPI_SERIAL_CONSTANT,
get_remote_package_filter,
)

from pulp_python.app import tasks
Expand Down Expand Up @@ -232,27 +234,31 @@ def list(self, request, path):

def pull_through_package_simple(self, package, path, remote):
"""Gets the package's simple page from remote."""
def parse_url(link):
parsed = urlparse(link.url)
digest, _, value = parsed.fragment.partition('=')
def parse_package(dis_package):
parsed = urlparse(dis_package.url)
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
redirect = f'{path}/{link.text}?redirect={stripped_url}'
d_url = urljoin(self.base_content_url, redirect)
return link.text, d_url, value if digest == 'sha256' else ''
redirect_path = f'{path}/{dis_package.filename}?redirect={stripped_url}'
d_url = urljoin(self.base_content_url, redirect_path)
return dis_package.filename, d_url, dis_package.digests.get("sha256", "")

rfilter = get_remote_package_filter(remote)
if not rfilter.filter_project(package):
return Http404(f"{package} does not exist.")

url = remote.get_remote_artifact_url(f'simple/{package}/')
kwargs = {}
if proxy_url := remote.proxy_url:
if remote.proxy_username or remote.proxy_password:
parsed_proxy = urlparse(proxy_url)
netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}"
proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", ""))
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}

response = requests.get(url, stream=True, **kwargs)
links = parse_links_stream_response(response)
packages = (parse_url(link) for link in links)
return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True))
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
downloader = remote.get_downloader(url=url, max_retries=1)
try:
d = downloader.fetch()
except Exception:
return Http404(f"Could not find {package}.")

if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
page = ProjectPage.from_json_data(json.load(open(d.path, "rb"), base_url=remote.url))
else:
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url)
packages = [parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)]
return Response(write_simple_detail(package, packages))

@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
def retrieve(self, request, path, package):
Expand Down
78 changes: 77 additions & 1 deletion pulp_python/app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from django.conf import settings
from jinja2 import Template
from packaging.utils import canonicalize_name
from packaging.version import parse
from packaging.requirements import Requirement
from packaging.version import parse, InvalidVersion


PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
Expand Down Expand Up @@ -315,3 +316,78 @@ def write_simple_detail(project_name, project_packages, streamed=False):
detail = Template(simple_detail_template)
context = {"project_name": project_name, "project_packages": project_packages}
return detail.stream(**context) if streamed else detail.render(**context)


class PackageIncludeFilter:
"""A special class to help filter Package's based on a remote's include/exclude"""

def __init__(self, remote):
self.remote = remote
self._filter_includes = self._parse_packages(self.remote.includes)
self._filter_excludes = self._parse_packages(self.remote.excludes)

def _parse_packages(self, packages):
config = defaultdict(lambda: defaultdict(list))
for value in packages:
requirement = Requirement(value)
requirement.name = canonicalize_name(requirement.name)
if requirement.specifier:
requirement.specifier.prereleases = True
config["range"][requirement.name].append(requirement)
else:
config["full"][requirement.name].append(requirement)
return config

def filter_project(self, project_name):
"""Return true/false if project_name would be allowed through remote's filters."""
project_name = canonicalize_name(project_name)
include_full = self._filter_includes.get("full", {})
include_range = self._filter_includes.get("range", {})
include = set(*include_range.keys(), *include_full.keys())
if include and project_name not in include:
return False

exclude_full = self._filter_excludes.get("full", {})
if project_name in exclude_full:
return False

return True

def filter_release(self, project_name, version):
"""Returns true/false if release would be allowed through remote's filters."""
project_name = canonicalize_name(project_name)
if not self.filter_project(project_name):
return False

try:
version = parse(version)
except InvalidVersion:
return False

include_range = self._filter_includes.get("range", {})
if project_name in include_range:
for req in include_range[project_name]:
if version in req.specifier:
break
else:
return False

exclude_range = self._filter_excludes.get("range", {})
if project_name in exclude_range:
for req in exclude_range[project_name]:
if version in req.specifier:
return False

return True


_remote_filters = {}
def get_remote_package_filter(remote):
if date_filter_tuple := _remote_filters.get(remote.pulp_id):
last_update, rfilter = date_filter_tuple
if last_update == remote.pulp_last_updated:
return rfilter

rfilter = PackageIncludeFilter(remote)
_remote_filters[remote.pulp_id] = (remote.pulp_last_updated, rfilter)
return rfilter
Loading