Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve parsing of author information #521

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions src/poetry/core/masonry/builders/builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
import re
import sys
import warnings

Expand All @@ -14,8 +13,6 @@
from poetry.core.poetry import Poetry


AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"()]+) <(?P<email>.+?)>$")

METADATA_BASE = """\
Metadata-Version: 2.1
Name: {name}
Expand Down Expand Up @@ -344,12 +341,11 @@ def convert_script_files(self) -> list[Path]:

@classmethod
def convert_author(cls, author: str) -> dict[str, str]:
m = AUTHOR_REGEX.match(author)
if m is None:
raise RuntimeError(f"{author} does not match regex")
from poetry.core.utils.helpers import parse_author

name = m.group("name")
email = m.group("email")
name, email = parse_author(author)
if not name or not email:
raise RuntimeError(f"{author} does not match regex")

return {"name": name, "email": email}

Expand Down
17 changes: 7 additions & 10 deletions src/poetry/core/packages/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from poetry.core.packages.dependency_group import MAIN_GROUP
from poetry.core.packages.specification import PackageSpecification
from poetry.core.packages.utils.utils import create_nested_marker
from poetry.core.utils.helpers import parse_author
from poetry.core.version.exceptions import InvalidVersion
from poetry.core.version.markers import parse_marker

Expand All @@ -32,6 +33,8 @@

T = TypeVar("T", bound="Package")

# TODO: once poetry.console.commands.init.InitCommand._validate_author
# uses poetry.core.utils.helpers.parse_author, this can be removed.
AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"():&]+)(?: <(?P<email>.+?)>)?$")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you stack this upon #517, please?



Expand Down Expand Up @@ -231,34 +234,28 @@ def _get_author(self) -> dict[str, str | None]:
if not self._authors:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._authors[0])
name, email = parse_author(self._authors[0])

if m is None:
if not name or not email:
raise ValueError(
"Invalid author string. Must be in the format: "
"John Smith <[email protected]>"
)

name = m.group("name")
email = m.group("email")

return {"name": name, "email": email}

def _get_maintainer(self) -> dict[str, str | None]:
if not self._maintainers:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._maintainers[0])
name, email = parse_author(self._maintainers[0])

if m is None:
if not name or not email:
raise ValueError(
"Invalid maintainer string. Must be in the format: "
"John Smith <[email protected]>"
)

name = m.group("name")
email = m.group("email")

return {"name": name, "email": email}

@property
Expand Down
24 changes: 24 additions & 0 deletions src/poetry/core/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import warnings

from contextlib import contextmanager
from email.utils import parseaddr
from pathlib import Path
from typing import Any
from typing import Iterator
Expand Down Expand Up @@ -105,3 +106,26 @@ def readme_content_type(path: str | Path) -> str:
return "text/markdown"
else:
return "text/plain"


def parse_author(address: str) -> tuple[str | None, str | None]:
"""Parse name and address parts from an email address string.

>>> parse_author("John Doe <[email protected]>")
('John Doe', '[email protected]')

.. note::

If the input string does not contain an ``@`` character, it is
assumed that it represents only a name without an email address.

:param address: the email address string to parse.
:return: a 2-tuple with the parsed name and email address. If a
part is missing, ``None`` will be returned in its place.
"""
if "@" not in address:
return address, None
name, email = parseaddr(address)
if not name and "@" not in email:
return email, None
return name or None, email or None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO we can remove all the if blocks.

58 changes: 58 additions & 0 deletions tests/utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

from poetry.core.utils.helpers import combine_unicode
from poetry.core.utils.helpers import parse_author
from poetry.core.utils.helpers import parse_requires
from poetry.core.utils.helpers import readme_content_type
from poetry.core.utils.helpers import temporary_directory
Expand Down Expand Up @@ -118,3 +119,60 @@ def test_utils_helpers_readme_content_type(
readme: str | Path, content_type: str
) -> None:
assert readme_content_type(readme) == content_type


def test_utils_helpers_parse_author():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please make it into parametrized test

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done; see commit 8286618.

"""Test the :func:`parse_author` function."""

# Verify the (probable) default use case
name, email = parse_author("John Doe <[email protected]>")
assert name == "John Doe"
assert email == "[email protected]"

# Name only
name, email = parse_author("John Doe")
assert name == "John Doe"
assert email is None

# Name with a “special” character + email address
name, email = parse_author("R&D <[email protected]>")
assert name == "R&D"
assert email == "[email protected]"

# Name with a “special” character only
name, email = parse_author("R&D")
assert name == "R&D"
assert email is None

# Name with fancy unicode character + email address
name, email = parse_author("my·fancy corp <[email protected]>")
assert name == "my·fancy corp"
assert email == "[email protected]"

# Name with fancy unicode character only
name, email = parse_author("my·fancy corp")
assert name == "my·fancy corp"
assert email is None

# Email address only, wrapped in angular brackets
name, email = parse_author("<[email protected]>")
assert name is None
assert email == "[email protected]"

# Email address only
name, email = parse_author("[email protected]")
assert name is None
assert email == "[email protected]"

# Non-RFC-conform cases with unquoted commas
name, email = parse_author("asf,[email protected]")
assert name == "asf"
assert email is None

name, email = parse_author("asf,<[email protected]>")
assert name == "asf"
assert email is None

name, email = parse_author("asf, [email protected]")
assert name == "asf"
assert email is None