-
Notifications
You must be signed in to change notification settings - Fork 252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve parsing of author information #521
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
from poetry.core.packages.dependency_group import MAIN_GROUP | ||
from poetry.core.packages.specification import PackageSpecification | ||
from poetry.core.packages.utils.utils import create_nested_marker | ||
from poetry.core.utils.helpers import parse_author | ||
from poetry.core.version.exceptions import InvalidVersion | ||
from poetry.core.version.markers import parse_marker | ||
|
||
|
@@ -32,6 +33,8 @@ | |
|
||
T = TypeVar("T", bound="Package") | ||
|
||
# TODO: once poetry.console.commands.init.InitCommand._validate_author | ||
# uses poetry.core.utils.helpers.parse_author, this can be removed. | ||
AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"():&]+)(?: <(?P<email>.+?)>)?$") | ||
|
||
|
||
|
@@ -231,34 +234,28 @@ def _get_author(self) -> dict[str, str | None]: | |
if not self._authors: | ||
return {"name": None, "email": None} | ||
|
||
m = AUTHOR_REGEX.match(self._authors[0]) | ||
name, email = parse_author(self._authors[0]) | ||
|
||
if m is None: | ||
if not name or not email: | ||
raise ValueError( | ||
"Invalid author string. Must be in the format: " | ||
"John Smith <[email protected]>" | ||
) | ||
|
||
name = m.group("name") | ||
email = m.group("email") | ||
|
||
return {"name": name, "email": email} | ||
|
||
def _get_maintainer(self) -> dict[str, str | None]: | ||
if not self._maintainers: | ||
return {"name": None, "email": None} | ||
|
||
m = AUTHOR_REGEX.match(self._maintainers[0]) | ||
name, email = parse_author(self._maintainers[0]) | ||
|
||
if m is None: | ||
if not name or not email: | ||
raise ValueError( | ||
"Invalid maintainer string. Must be in the format: " | ||
"John Smith <[email protected]>" | ||
) | ||
|
||
name = m.group("name") | ||
email = m.group("email") | ||
|
||
return {"name": name, "email": email} | ||
|
||
@property | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import warnings | ||
|
||
from contextlib import contextmanager | ||
from email.utils import parseaddr | ||
from pathlib import Path | ||
from typing import Any | ||
from typing import Iterator | ||
|
@@ -105,3 +106,26 @@ def readme_content_type(path: str | Path) -> str: | |
return "text/markdown" | ||
else: | ||
return "text/plain" | ||
|
||
|
||
def parse_author(address: str) -> tuple[str | None, str | None]: | ||
"""Parse name and address parts from an email address string. | ||
|
||
>>> parse_author("John Doe <[email protected]>") | ||
('John Doe', '[email protected]') | ||
|
||
.. note:: | ||
|
||
If the input string does not contain an ``@`` character, it is | ||
assumed that it represents only a name without an email address. | ||
|
||
:param address: the email address string to parse. | ||
:return: a 2-tuple with the parsed name and email address. If a | ||
part is missing, ``None`` will be returned in its place. | ||
""" | ||
if "@" not in address: | ||
return address, None | ||
name, email = parseaddr(address) | ||
if not name and "@" not in email: | ||
return email, None | ||
return name or None, email or None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO we can remove all the |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import pytest | ||
|
||
from poetry.core.utils.helpers import combine_unicode | ||
from poetry.core.utils.helpers import parse_author | ||
from poetry.core.utils.helpers import parse_requires | ||
from poetry.core.utils.helpers import readme_content_type | ||
from poetry.core.utils.helpers import temporary_directory | ||
|
@@ -118,3 +119,60 @@ def test_utils_helpers_readme_content_type( | |
readme: str | Path, content_type: str | ||
) -> None: | ||
assert readme_content_type(readme) == content_type | ||
|
||
|
||
def test_utils_helpers_parse_author(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please make it into parametrized test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done; see commit 8286618. |
||
"""Test the :func:`parse_author` function.""" | ||
|
||
# Verify the (probable) default use case | ||
name, email = parse_author("John Doe <[email protected]>") | ||
assert name == "John Doe" | ||
assert email == "[email protected]" | ||
|
||
# Name only | ||
name, email = parse_author("John Doe") | ||
assert name == "John Doe" | ||
assert email is None | ||
|
||
# Name with a “special” character + email address | ||
name, email = parse_author("R&D <[email protected]>") | ||
assert name == "R&D" | ||
assert email == "[email protected]" | ||
|
||
# Name with a “special” character only | ||
name, email = parse_author("R&D") | ||
assert name == "R&D" | ||
assert email is None | ||
|
||
# Name with fancy unicode character + email address | ||
name, email = parse_author("my·fancy corp <[email protected]>") | ||
assert name == "my·fancy corp" | ||
assert email == "[email protected]" | ||
|
||
# Name with fancy unicode character only | ||
name, email = parse_author("my·fancy corp") | ||
assert name == "my·fancy corp" | ||
assert email is None | ||
|
||
# Email address only, wrapped in angular brackets | ||
name, email = parse_author("<[email protected]>") | ||
assert name is None | ||
assert email == "[email protected]" | ||
|
||
# Email address only | ||
name, email = parse_author("[email protected]") | ||
assert name is None | ||
assert email == "[email protected]" | ||
|
||
# Non-RFC-conform cases with unquoted commas | ||
name, email = parse_author("asf,[email protected]") | ||
assert name == "asf" | ||
assert email is None | ||
|
||
name, email = parse_author("asf,<[email protected]>") | ||
assert name == "asf" | ||
assert email is None | ||
|
||
name, email = parse_author("asf, [email protected]") | ||
assert name == "asf" | ||
assert email is None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you stack this upon #517, please?