From f1d29584999e77b3cc9434bc2a73833d489bdfe9 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 27 Jul 2023 14:16:48 -0400 Subject: [PATCH 01/17] Add contribution_utils. --- dcicutils/contribution_scripts.py | 39 +++ dcicutils/contribution_utils.py | 509 ++++++++++++++++++++++++++++++ dcicutils/qa_checkers.py | 9 + docs/source/dcicutils.rst | 14 + poetry.lock | 22 +- pyproject.toml | 5 +- test/test_contribution_utils.py | 288 +++++++++++++++++ test/test_misc.py | 7 +- 8 files changed, 879 insertions(+), 14 deletions(-) create mode 100644 dcicutils/contribution_scripts.py create mode 100644 dcicutils/contribution_utils.py create mode 100644 test/test_contribution_utils.py diff --git a/dcicutils/contribution_scripts.py b/dcicutils/contribution_scripts.py new file mode 100644 index 000000000..bc4091802 --- /dev/null +++ b/dcicutils/contribution_scripts.py @@ -0,0 +1,39 @@ +import argparse + +from dcicutils.command_utils import script_catch_errors, ScriptFailure +from .contribution_utils import Contributions, PROJECT_HOME + + +EPILOG = __doc__ + + +def show_contributors(repo, exclude_fork=None, verbose=False, save_contributors=False, test=False): + contributions = Contributions(repo=repo, exclude_fork=exclude_fork, verbose=verbose) + if save_contributors: + contributions.save_contributor_data() + contributions.show_repo_contributors(error_class=ScriptFailure if test else None) + + +def show_contributors_main(): + parser = argparse.ArgumentParser( # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here. + description=(f"Show authors of a specified repository, which will be presumed" + f" to have been cloned as a subdirectory of $PROJECT_HOME ({PROJECT_HOME})"), + epilog=EPILOG, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument('repo', default=None, + help="name of repository to show contributors for") + parser.add_argument('--exclude', '-x', default=None, + help="name of repository that repo was forked from, whose contributors to exclude") + parser.add_argument('--save-contributors', '-s', action="store_true", default=False, + help="whether to store contributor data to CONTRIBUTORS.json") + parser.add_argument('--test', '-t', action="store_true", default=False, + help="whether to treat this as a test, erring if a cache update is needed") + parser.add_argument('--verbose', '-v', action="store_true", default=False, + help="whether to do verbose output while working") + args = parser.parse_args() + + with script_catch_errors(): + + show_contributors(repo=args.repo, exclude_fork=args.exclude, verbose=args.verbose, + save_contributors=args.save_contributors, test=args.test) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py new file mode 100644 index 000000000..23f24ad59 --- /dev/null +++ b/dcicutils/contribution_utils.py @@ -0,0 +1,509 @@ +import datetime +import git +import io +import json +import os +import re + +from collections import defaultdict +from dcicutils.diff_utils import DiffManager +from dcicutils.lang_utils import n_of +from dcicutils.misc_utils import PRINT, ignored, environ_bool +from typing import Dict, List, Optional, Set, Type + + +DEBUG_CONTRIBUTIONS = environ_bool("DEBUG_CONTRIBUTIONS") + +GITHUB_USER_REGEXP = re.compile('(?:[0-9]+[+])?(.*)[@]users[.]noreply[.]github[.]com') +PROJECT_HOME = os.environ.get('PROJECT_HOME', os.path.dirname(os.path.abspath(os.curdir))) + + +class GitAnalysis: + + @classmethod + def find_repo(cls, repo_name: str) -> git.Repo: + repo_path = os.path.join(PROJECT_HOME, repo_name) + repo = git.Repo(repo_path) + return repo + + @classmethod + def git_commits(cls, repo_name) -> List[git.Commit]: + repo = cls.find_repo(repo_name) + commit: git.Commit + for commit in repo.iter_commits(): + yield cls.json_for_commit(commit) + + @classmethod + def json_for_actor(cls, actor: git.Actor) -> Dict: + return { + "name": actor.name, + "email": actor.email, + } + + @classmethod + def json_for_commit(cls, commit: git.Commit) -> Dict: + return { + 'commit': commit.hexsha, + 'date': commit.authored_datetime, + 'author': cls.json_for_actor(commit.author), + 'coauthors': [cls.json_for_actor(co_author) for co_author in commit.co_authors], + 'message': commit.message, + } + + +class Contributor: + + @classmethod + def create(cls, *, author: git.Actor) -> 'Contributor': + return Contributor(email=author.email, name=author.name) + + def __init__(self, *, email: Optional[str] = None, name: Optional[str] = None, + emails: Optional[Set[str]] = None, names: Optional[Set[str]] = None, + primary_name: Optional[str] = None): + # Both email and name are required keyword arguments, though name is allowed to be None, + # even though email is not. The primary_name is not required, and defaults to None, so will + # be heuristically computed based on available names. + if not email and not emails: + raise ValueError("One of email= or emails= is required when initializing a Contributor.") + if email and emails: + raise ValueError("Only one of email= and emails= may be provided.") + if not emails: + emails = {email} + if name and names: + raise ValueError("Only one of name= and names= may be provided.") + if name and not names: + names = {name} + self.emails: Set[str] = emails + self.names: Set[str] = names or set() + self._primary_name = primary_name + if primary_name: + self.names.add(primary_name) + + def __str__(self): + maybe_primary = f" {self._primary_name!r}" if self._primary_name else "" + emails = ",".join(sorted(map(repr, self.emails), key=lambda x: x.lower())) + names = ",".join(sorted(map(repr, self.names), key=lambda x: x.lower())) + return f"<{self.__class__.__name__}{maybe_primary} emails={emails} names={names} {id(self)}>" + + def copy(self): + return Contributor(emails=self.emails.copy(), names=self.names.copy(), primary_name=self._primary_name) + + @property + def primary_name(self): + if self._primary_name: + return self._primary_name + return sorted(self.names, key=self.name_variation_spelling_sort_key, reverse=True)[0] + + def set_primary_name(self, primary_name: str): + self.names.add(primary_name) + self._primary_name = primary_name + + def notice_mention_as(self, *, email: Optional[str] = None, name: Optional[str] = None): + if email is not None and email not in self.emails: + self.emails.add(email) + if name is not None and name not in self.names: + self.names.add(name) + + def as_dict(self): + # Note that the sort is case-sensitive alphabetic just because that's easiest here. + # Making it be case-insensitive would require a special case for non-strings, + # and all we really care about is some easy degree of determinism for testing. + data = { + "emails": sorted(self.emails), + "names": sorted(self.names), + } + return data + + @classmethod + def from_dict(cls, data: Dict, *, key: Optional[str] = None) -> 'Contributor': + emails = data["emails"] + names = data["names"] + contributor = Contributor(email=emails[0], name=names[0]) + contributor.emails = set(emails) + contributor.names = set(names) + if key: + contributor.set_primary_name(key) + return contributor + + @classmethod + def name_variation_spelling_sort_key(cls, name): + return ( + ' ' in name, # we prefer names like 'jane doe' over jdoe123 + len(name), # longer names are usually more formal; william smith vs will smith + name, # this names by alphabetical order not because one is better, but to make sort results deterministic + ) + + +ContributorIndex = Optional[Dict[str, Contributor]] + + +class Contributions(GitAnalysis): + + VERBOSE = False + + def __init__(self, *, repo: Optional[str] = None, + exclude_fork: Optional[str] = None, + verbose: Optional[bool] = None): + if not repo: + # Doing it this way gets around an ambiguity about '/foo/' vs '/foo' since both + # os.path.join('/foo/', 'bar') and os.path.join('/foo', 'bar') yield '/foo/bar', + # and from there one can do os.path.basename(os.path.dirname(...)) to get 'foo' out. + cache_file = os.path.join(os.path.abspath(os.path.curdir), self.CONTRIBUTORS_CACHE_FILE) + dir = os.path.dirname(cache_file) + repo = os.path.basename(dir) + self.email_timestamps: Dict[str, datetime.datetime] = {} + self.name_timestamps: Dict[str, datetime.datetime] = {} + self.repo: str = repo + self.exclude_fork: Optional[str] = exclude_fork + self.excluded_contributions = None + self.forked_at: Optional[datetime.datetime] = None + self.contributors_by_name: Optional[ContributorIndex] = None + self.contributors_by_email: Optional[ContributorIndex] = None + self.pre_fork_contributors_by_email: Optional[ContributorIndex] = None + self.pre_fork_contributors_by_name: Optional[ContributorIndex] = None + self.verbose = self.VERBOSE if verbose is None else verbose + self.loaded_contributor_data = None + self.cache_discrepancies: Optional[dict] = None + existing_contributor_data_file = self.existing_contributors_json_file() + if existing_contributor_data_file: + # This will set .loaded_contributor_data and other values from CONTRIBUTORS.json + self.load_contributors_from_json_file_cache(existing_contributor_data_file) + + if exclude_fork and not self.excluded_contributions: + self.excluded_contributions = Contributions(repo=exclude_fork) + checkpoint1 = self.checkpoint_state() + self.reconcile_contributors_with_github_log() + checkpoint2 = self.checkpoint_state() + + def list_to_dict_normalizer(*, label, item): + ignored(label) + if isinstance(item, list): + return {elem: elem for elem in item} + else: + return item + + if existing_contributor_data_file: + diff_manager = DiffManager(label="contributors") + contributors1 = checkpoint1['contributors_by_name'] + contributors2 = checkpoint2['contributors_by_name'] + diffs = diff_manager.diffs(contributors1, contributors2, normalizer=list_to_dict_normalizer) + added = diffs.get('added') + changed = diffs.get('changed') + removed = diffs.get('removed') + self.cache_discrepancies = cache_discrepancies = {} + if added: + cache_discrepancies['to_add'] = added + if changed: + cache_discrepancies['to_change'] = changed + if removed: + cache_discrepancies['to_remove'] = removed + + @classmethod + def pretty_email(cls, email): + m = GITHUB_USER_REGEXP.match(email) + if m: + user_name = m.group(1) + return f"{user_name}@github" + else: + return email + + @classmethod + def notice_reference_time(cls, key: str, timestamp: datetime.datetime, timestamps: Dict[str, datetime.datetime]): + reference_timestamp: datetime.datetime = timestamps.get(key) + if not reference_timestamp: + timestamps[key] = timestamp + elif timestamp > reference_timestamp: + timestamps[key] = timestamp + + def email_reference_time(self, email): + return self.email_timestamps.get(email) + + def name_reference_time(self, name): + return self.name_timestamps.get(name) + + CONTRIBUTORS_CACHE_FILE = 'CONTRIBUTORS.json' + + def contributors_json_file(self): + """ + Returns the name of the CONTRIBUTORS.json file for the repo associated with this class. + """ + return os.path.join(PROJECT_HOME, self.repo, self.CONTRIBUTORS_CACHE_FILE) + + def existing_contributors_json_file(self): + """ + Returns the name of the CONTRIBUTORS.json file for the repo associated with this class if that file exists, + or None if there is no such file. + """ + file = self.contributors_json_file() + if os.path.exists(file): + return file + else: + return None + + def load_contributors_from_json_file_cache(self, filename): + try: + with io.open(filename, 'r') as fp: + data = json.load(fp) + except Exception: + PRINT(f"Error while reading data from {filename!r}.") + raise + self.loaded_contributor_data = data + self.load_from_dict(data) + + if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only + PRINT("After load_contributors_from_json_file_cache...") + PRINT(f"{n_of(self.pre_fork_contributors_by_name, 'pre-fork contributor by name')}") + PRINT(f"{n_of(self.pre_fork_contributors_by_email, 'pre-fork contributor by email')}") + PRINT(f"{n_of(self.contributors_by_name, 'contributor by name')}") + PRINT(f"{n_of(self.contributors_by_email, 'contributor by email')}") + + def reconcile_contributors_with_github_log(self): + """ + Rummages the GitHub log entries for contributors we don't know about. + That data is merged against our existing structures. + """ + if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only + PRINT("Reconciling with git log.") + excluded_fork_contributors_by_email = (self.excluded_contributions.contributors_by_email + if self.excluded_contributions + else {}) + # excluded_fork_contributors_by_name = (self.excluded_contributions.contributors_by_name + # if self.excluded_contributions + # else {}) + fork_contributor_emails = set(excluded_fork_contributors_by_email.keys()) + + post_fork_contributors_seen = defaultdict(lambda: []) + + contributors_by_email: ContributorIndex = self.contributors_by_email or {} + git_repo = self.find_repo(repo_name=self.repo) + + def notice_author(*, author: git.Actor, date: datetime.datetime): + if self.forked_at: + if date < self.forked_at: + return + # raise Exception("Commits are out of order.") + elif author.email not in fork_contributor_emails: + PRINT(f"Forked {self.repo} at {date} by {author.email}") + self.forked_at = date + + if self.forked_at and date >= self.forked_at: + if author.email in fork_contributor_emails: + # PRINT(f"Post-fork contribution from {author.email} ({date})") + post_fork_contributors_seen[author.email].append(date) + self.notice_reference_time(key=author.email, timestamp=date, timestamps=self.email_timestamps) + self.notice_reference_time(key=author.name, timestamp=date, timestamps=self.name_timestamps) + + contributor_by_email = contributors_by_email.get(author.email) + if contributor_by_email: # already exists, so update it + contributor_by_email.notice_mention_as(email=author.email, name=author.name) + else: # need to create it new + contributor_by_email = Contributor.create(author=author) + contributors_by_email[author.email] = contributor_by_email + else: + # print("skipped") + pass + + n = 0 + for commit in reversed(list(git_repo.iter_commits())): + n += 1 + commit_date = commit.committed_datetime + notice_author(author=commit.author, date=commit_date) + for co_author in commit.co_authors: + notice_author(author=co_author, date=commit_date) + if DEBUG_CONTRIBUTIONS: + PRINT(f"{n_of(n, 'commit')} processed.") + + for email, dates in post_fork_contributors_seen.items(): + when = str(dates[0].date()) + if len(dates) > 1: + when += f" to {dates[-1].date()}" + PRINT(f"{n_of(dates, 'post-fork commit')} seen for {email} ({when}).") + + contributors_by_name: ContributorIndex = {} + + for contributor_by_email in contributors_by_email.values(): + self.traverse(root=contributor_by_email, + cursor=contributor_by_email, + contributors_by_email=contributors_by_email, + contributors_by_name=contributors_by_name) + for name in list(contributor_by_email.names): + contributors_by_name[name] = contributor_by_email + for email in list(contributor_by_email.emails): + contributors_by_email[email] = contributor_by_email + + # Note that the name table is somewhat unified, merging related cells, but the email table isn't. + # In part that's because I was lazy, but also we don't really need the email table to be so careful. + # It's the human names that really matter. + # if not self.pre_fork_contributors_by_name: + # self.pre_fork_contributors_by_name = excluded_fork_contributors_by_name + + # if not self.pre_fork_contributors_by_email: + # self.pre_fork_contributors_by_email = excluded_fork_contributors_by_email + + self.contributors_by_name = self.contributor_index_by_primary_name(contributors_by_name) + self.contributors_by_email = contributors_by_email + + if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only + PRINT("After reconcile_contributors_with_github_log...") + PRINT(f"{n_of(self.pre_fork_contributors_by_name, 'pre-fork contributor by name')}") + PRINT(f"{n_of(self.pre_fork_contributors_by_email, 'pre-fork contributor by email')}") + PRINT(f"{n_of(self.contributors_by_name, 'contributor by name')}") + PRINT(f"{n_of(self.contributors_by_email, 'contributor by email')}") + + @classmethod + def contributor_index_by_primary_name(cls, contributors_by_name: ContributorIndex) -> ContributorIndex: + """ + Given a by-name contributor index: + + * Makes sure that all contributors have only one name, indexed by the contributor's primary name + * Sorts the resulting index using a case-insensitive alphabetic sort + + and then returns the result. + + :param contributors_by_name: a contributor index indexed by human name + :return: a contributor index + """ + seen = set() + contributor_items = [] + contributors = {} + for name, contributor in contributors_by_name.items(): + if contributor not in seen: + for nickname in contributor.names: + if nickname in seen: + raise Exception(f"Name improperly shared between {contributor}" + f" and {contributors_by_name[nickname]}") + contributor_items.append((contributor.primary_name, contributor)) + seen.add(contributor) + for name, contributor in sorted(contributor_items, + # Having selected the longest names, now sort names ignoring case + key=lambda pair: pair[0].lower()): + contributors[name] = contributor + return contributors + + def traverse(self, + root: Contributor, + cursor: Contributor, + contributors_by_email: ContributorIndex, + contributors_by_name: ContributorIndex, + seen: Optional[Set[Contributor]] = None): + if seen is None: + seen = set() + if cursor in seen: + return + seen.add(cursor) + for name in list(cursor.names): + root.names.add(name) + for email in list(cursor.emails): + root.emails.add(email) + for name in list(cursor.names): + contributor = contributors_by_name.get(name) + if contributor and contributor not in seen: + self.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, + contributors_by_name=contributors_by_name, seen=seen) + for email in list(cursor.emails): + contributor = contributors_by_email.get(email) + if contributor and contributor not in seen: + self.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, + contributors_by_name=contributors_by_name, seen=seen) + + @classmethod + def contributor_values_as_dicts(cls, contributor_index: ContributorIndex): + if contributor_index is None: + return None + else: + return { + key: contributor.as_dict() + for key, contributor in contributor_index.items() + } + + @classmethod + def contributor_values_as_objects(cls, contributor_index: Dict): + if contributor_index is None: + return None + else: + return { + key: Contributor.from_dict(value, key=key) + for key, value in contributor_index.items() + } + + def checkpoint_state(self): + return self.as_dict() + + def as_dict(self): + data = { + "forked_at": self.forked_at.isoformat() if self.forked_at else None, + "excluded_fork": self.exclude_fork, + "pre_fork_contributors_by_name": self.contributor_values_as_dicts(self.pre_fork_contributors_by_name), + "contributors_by_name": self.contributor_values_as_dicts(self.contributors_by_name), + } + return data + + def load_from_dict(self, data: Dict): + forked_at: Optional[str] = data.get('forked_at') + excluded_fork = data.get('excluded_fork') + self.forked_at = None if forked_at is None else datetime.datetime.fromisoformat(forked_at) + self.exclude_fork = excluded_fork + + fork_contributors_by_name_json = data.get('pre_fork_contributors_by_name') or {} + fork_contributors_by_name = self.contributor_values_as_objects(fork_contributors_by_name_json) + self.pre_fork_contributors_by_name = fork_contributors_by_name + fork_contributors_by_email_json = data.get('pre_fork_contributors_by_email') or {} + if fork_contributors_by_email_json: + self.pre_fork_contributors_by_email = self.contributor_values_as_objects(fork_contributors_by_email_json) + else: + self.pre_fork_contributors_by_email = self.by_email_from_by_name(fork_contributors_by_name) + + contributors_by_name_json = data.get('contributors_by_name', {}) + self.contributors_by_name = contributors_by_name = self.contributor_values_as_objects(contributors_by_name_json) + contributors_by_email_json = data.get('contributors_by_email', {}) + if contributors_by_email_json: + self.contributors_by_email = self.contributor_values_as_objects(contributors_by_email_json) + else: + self.contributors_by_email = self.by_email_from_by_name(contributors_by_name) + + @classmethod + def by_email_from_by_name(cls, contributors_by_email_json): + result = {} + for email_key, entry in contributors_by_email_json.items(): + ignored(email_key) + for email in entry.get("emails", []) if isinstance(entry, dict) else entry.emails: + if result.get(email): + raise Exception(f"email address {email} is used more than once.") + result[email] = entry + return result + + def save_contributor_data(self, filename: Optional[str] = None): + if filename is None: + filename = self.contributors_json_file() + with io.open(filename, 'w') as fp: + PRINT(json.dumps(self.as_dict(), indent=2), file=fp) + + def repo_contributor_names(self, with_email=False): + for name, contributor in self.contributors_by_name.items(): + if with_email: + yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})" + else: + yield name + + def show_repo_contributors(self, analyze_discrepancies: bool = True, with_email: bool = True, + error_class: Optional[Type[BaseException]] = None): + for author_name in self.repo_contributor_names(with_email=with_email): + PRINT(author_name) + if analyze_discrepancies: + file = self.existing_contributors_json_file() + if not file: + message = f"Need to create a {self.CONTRIBUTORS_CACHE_FILE} file for {self.repo}." + if error_class: + raise error_class(message) + else: + PRINT(message) + elif self.cache_discrepancies: + message = "There are contributor cache discrepancies." + PRINT(f"===== {message.rstrip('.').upper()} =====") + for action, items in self.cache_discrepancies.items(): + action: str + PRINT(f"{action.replace('_', ' ').title()}:") + for item in items: + PRINT(f" * {item}") + if error_class: + raise error_class(message) diff --git a/dcicutils/qa_checkers.py b/dcicutils/qa_checkers.py index 61a79cc77..291135bd1 100644 --- a/dcicutils/qa_checkers.py +++ b/dcicutils/qa_checkers.py @@ -8,6 +8,7 @@ from collections import defaultdict from typing import Optional, List, Dict, Type from typing_extensions import Literal +from .contribution_utils import Contributions from .lang_utils import conjoined_list, n_of, there_are from .misc_utils import PRINT, remove_prefix, remove_suffix, getattr_customized, CustomizableProperty @@ -436,3 +437,11 @@ def summarize(problems): detail += f"\n In {file}, {summarize(matches)}." message = f"{n_of(n, 'problem')} detected:" + detail raise AssertionError(message) + + +class ContributionsChecker: + + @classmethod + def validate(cls): + contributions = Contributions() # no repo specified, so use current directory + contributions.show_repo_contributors(error_class=AssertionError) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index b18d0a952..7fdaba7ea 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -37,6 +37,20 @@ command_utils :members: +contribution_scripts +^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.contribution_scripts + :members: + + +contribution_utils +^^^^^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.contribution_utils + :members: + + creds_utils ^^^^^^^^^^^ diff --git a/poetry.lock b/poetry.lock index da9155f6b..d428e79a8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -70,18 +70,18 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.26.101" +version = "1.28.11" description = "The AWS SDK for Python" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.101-py3-none-any.whl", hash = "sha256:5f5279a63b359ba8889e9a81b319e745b14216608ffb5a39fcbf269d1af1ea83"}, - {file = "boto3-1.26.101.tar.gz", hash = "sha256:670ae4d1875a2162e11c6e941888817c3e9cf1bb9a3335b3588d805b7d24da31"}, + {file = "boto3-1.28.11-py3-none-any.whl", hash = "sha256:e24460d50001b517c6734dcf1c879feb43aa2062d88d9bdbb8703c986cb05941"}, + {file = "boto3-1.28.11.tar.gz", hash = "sha256:0fe7a35cf0041145c8eefebd3ae2ddf41baed62d7c963e5042b8ed8c297f648f"}, ] [package.dependencies] -botocore = ">=1.29.101,<1.30.0" +botocore = ">=1.31.11,<1.32.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -386,14 +386,14 @@ xray = ["mypy-boto3-xray (>=1.18.18)"] [[package]] name = "botocore" -version = "1.29.101" +version = "1.31.11" description = "Low-level, data-driven core of boto 3." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.101-py3-none-any.whl", hash = "sha256:60c7a7bf8e2a288735e507007a6769be03dc24815f7dc5c7b59b12743f4a31cf"}, - {file = "botocore-1.29.101.tar.gz", hash = "sha256:7bb60d9d4c49500df55dfb6005c16002703333ff5f69dada565167c8d493dfd5"}, + {file = "botocore-1.31.11-py3-none-any.whl", hash = "sha256:d3cbffe554c9a1ba2ac6973734c43c21b8e7985a2ac4a4c31a09811b8029445c"}, + {file = "botocore-1.31.11.tar.gz", hash = "sha256:b17ff973bb70b02b227928c2abe4992f1cfc46d13aee0228516c8f32572b88c6"}, ] [package.dependencies] @@ -402,7 +402,7 @@ python-dateutil = ">=2.1,<3.0.0" urllib3 = ">=1.25.4,<1.27" [package.extras] -crt = ["awscrt (==0.16.9)"] +crt = ["awscrt (==0.16.26)"] [[package]] name = "botocore-stubs" @@ -820,14 +820,14 @@ smmap = ">=3.0.1,<6" [[package]] name = "gitpython" -version = "3.1.31" +version = "3.1.32" description = "GitPython is a Python library used to interact with Git repositories" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "GitPython-3.1.31-py3-none-any.whl", hash = "sha256:f04893614f6aa713a60cbbe1e6a97403ef633103cdd0ef5eb6efe0deb98dbe8d"}, - {file = "GitPython-3.1.31.tar.gz", hash = "sha256:8ce3bcf69adfdf7c7d503e78fd3b1c492af782d58893b650adb2ac8912ddd573"}, + {file = "GitPython-3.1.32-py3-none-any.whl", hash = "sha256:e3d59b1c2c6ebb9dfa7a184daf3b6dd4914237e7488a1730a6d8f6f5d0b4187f"}, + {file = "GitPython-3.1.32.tar.gz", hash = "sha256:8d9b8cb1e80b9735e8717c9362079d3ce4c6e5ddeebedd0361b228c3a67a62f6"}, ] [package.dependencies] diff --git a/pyproject.toml b/pyproject.toml index d79690d4f..4af381226 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.6.0.2b3" +version = "7.6.0.2b4" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -80,7 +80,7 @@ pytest-runner = ">=5.1" [tool.poetry.scripts] publish-to-pypi = "dcicutils.scripts.publish_to_pypi:main" - +show-contributors = "dcicutils.contribution_scripts:show_contributors_main" [tool.pytest.ini_options] addopts = "--basetemp=/tmp/pytest" @@ -89,6 +89,7 @@ filterwarnings = [ "ignore:distutils Version classes are deprecated. Use packaging.version instead.:DeprecationWarning", # This is a pip-licenses problem (still present in 3.5.5): "ignore:pkg_resources is deprecated as an API:DeprecationWarning", + "ignore:Boto3 will no longer support Python 3.7 starting December 13, 2023.*:", ] markers = [ "working: test should work", diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py new file mode 100644 index 000000000..c02ab8151 --- /dev/null +++ b/test/test_contribution_utils.py @@ -0,0 +1,288 @@ +import contextlib +import os + +from dcicutils import contribution_utils as contribution_utils_module +from dcicutils.contribution_utils import Contributor, Contributions, GitAnalysis +from dcicutils.misc_utils import make_counter # , override_environ +from dcicutils.qa_utils import MockId +from typing import List, Optional +from unittest import mock + + +class MockGitActor: + def __init__(self, name: Optional[str], email: str): + self.name = name + self.email = email + + +class MockGitCommit: + def __init__(self, hexsha: str, authored_datetime: str, author: dict, + message: str, co_authors: Optional[List[dict]] = None): + self.hexsha = hexsha + self.authored_datetime = authored_datetime + self.author = MockGitActor(**author) + self.co_authors = [MockGitActor(**co_author) for co_author in (co_authors or [])] + self.message = message + + +class MockGitRepo: + + def __init__(self, name, path, mocked_commits=None): + self.name = name + self.path = path + self.mocked_commits = mocked_commits or [] + + def iter_commits(self): + for json_data in self.mocked_commits: + yield MockGitCommit(**json_data) + + +class MockGitModule: + + def __init__(self, mocked_commits=None): + if mocked_commits is None: + mocked_commits = {} + self._mocked_commits = mocked_commits + + def Repo(self, path): + repo_name = os.path.basename(path) + return MockGitRepo(name=repo_name, path=path, mocked_commits=self._mocked_commits.get(repo_name, [])) + + +SAMPLE_USER_HOME = "/home/jdoe" +SAMPLE_PROJECT_HOME = f"{SAMPLE_USER_HOME}/repos" +SAMPLE_FOO_HOME = f"{SAMPLE_PROJECT_HOME}/foo" + + +@contextlib.contextmanager +def git_context(project_home=SAMPLE_PROJECT_HOME, mocked_commits=None): + with mock.patch.object(contribution_utils_module, "git", MockGitModule(mocked_commits=mocked_commits)): + with mock.patch.object(contribution_utils_module, "PROJECT_HOME", project_home): + yield + + +def test_git_analysis_find_repo(): + + with git_context(): + foo_repo = GitAnalysis.find_repo("foo") + assert isinstance(foo_repo, MockGitRepo) + assert foo_repo.name == 'foo' + assert foo_repo.path == SAMPLE_FOO_HOME + assert foo_repo.mocked_commits == [] + + +def test_git_analysis_git_commits(): + + with git_context(mocked_commits={"foo": [ + { + "hexsha": "aaaa", + "authored_datetime": "2020-01-01 01:23:45", + "author": {"name": "Jdoe", "email": "jdoe@foo"}, + "message": "something" + }, + { + "hexsha": "bbbb", + "authored_datetime": "2020-01-02 12:34:56", + "author": {"name": "Sally", "email": "ssmith@foo"}, + "message": "something else" + } + ]}): + foo_repo = GitAnalysis.find_repo("foo") + foo_commits = list(foo_repo.iter_commits()) + assert len(foo_commits) == 2 + assert all(isinstance(commit, MockGitCommit) for commit in foo_commits) + assert GitAnalysis.json_for_actor(foo_commits[0].author) == {'name': 'Jdoe', 'email': 'jdoe@foo'} + assert GitAnalysis.json_for_actor(foo_commits[1].author) == {'name': 'Sally', 'email': 'ssmith@foo'} + assert foo_commits[0].hexsha == 'aaaa' + assert foo_commits[1].hexsha == 'bbbb' + + +def test_git_analysis_iter_commits_scenario(): # This tests .iter_commits, .json_for_commit and .json_for_actor + + with git_context(mocked_commits={"foo": [ + { + "hexsha": "aaaa", + "authored_datetime": "2020-01-01 01:23:45", + "author": {"name": "Jdoe", "email": "jdoe@foo"}, + "message": "something" + }, + { + "hexsha": "bbbb", + "authored_datetime": "2020-01-02 12:34:56", + "author": {"name": "Sally", "email": "ssmith@foo"}, + "co_authors": [{"name": "William Simmons", "email": "bill@someplace"}], + "message": "something else" + } + ]}): + foo_repo = GitAnalysis.find_repo("foo") + assert [GitAnalysis.json_for_commit(commit) for commit in foo_repo.iter_commits()] == [ + { + 'author': {'email': 'jdoe@foo', 'name': 'Jdoe'}, + 'coauthors': [], + 'commit': 'aaaa', + 'date': '2020-01-01 01:23:45', + 'message': 'something' + }, + { + 'author': {'email': 'ssmith@foo', 'name': 'Sally'}, + 'coauthors': [{'email': 'bill@someplace', 'name': 'William Simmons'}], + 'commit': 'bbbb', + 'date': '2020-01-02 12:34:56', + 'message': 'something else' + } + ] + + +def test_contributor_str(): + + with mock.patch.object(contribution_utils_module, "id", MockId(1000)): + + john = Contributor(names={"John Doe", "jdoe"}, email="john@whatever") + + assert str(john) == "" + + jdoe = Contributor(names={"John Doe", "jdoe"}, primary_name="jdoe", email="john@whatever") + + assert str(jdoe) == "" + + +def test_contributor_set_primary_name(): + + john = Contributor(names={"John Doe", "jdoe"}, email="john@whatever") + assert john.primary_name == "John Doe" + + john.names.add("John Q Doe") + assert john.primary_name == "John Q Doe" + + john.set_primary_name("John Doe") + assert john.primary_name == "John Doe" + + john.names.add("John Quincy Doe") + assert john.primary_name == "John Doe" + + assert "Quincy" not in john.names + john.set_primary_name("Quincy") + assert john.primary_name == "Quincy" + assert "Quincy" in john.names + + +def test_contributor_copy(): + + john = Contributor(name="john", email="john@foo") + jack = john.copy() + + assert john.names == {"john"} + assert john.emails == {"john@foo"} + assert john.primary_name == "john" + + jack.names.add("jack") + jack.emails.add("jack@bar") + jack.set_primary_name("jack") + + assert john.names == {"john"} + assert john.emails == {"john@foo"} + assert john.primary_name == "john" + + assert jack.names == {"john", "jack"} + assert jack.emails == {"john@foo", "jack@bar"} + assert jack.primary_name == "jack" + + +def test_contributor_primary_name(): + + email_counter = make_counter() + + def make_contributor(names, primary_name=None): + some_email = f"user{email_counter()}@something.foo" # unique but we don't care about the specific value + return Contributor(names=set(names), email=some_email, primary_name=primary_name) + + assert make_contributor({"John Doe", "jdoe"}).primary_name == "John Doe" + assert make_contributor({"jdoe", "John Doe"}).primary_name == "John Doe" + assert make_contributor({"jdoe", "John Doe", "jdoe123456789"}).primary_name == "John Doe" + assert make_contributor({"jdoe123456789", "John Doe", "jdoe"}).primary_name == "John Doe" + + assert make_contributor({"jdoe123456789", "John Doe", "John Q Doe", "jdoe"}).primary_name == "John Q Doe" + assert make_contributor({"jdoe123456789", "John Q Doe", "John Doe", "jdoe"}).primary_name == "John Q Doe" + + assert make_contributor({"John Doe", "jdoe"}, primary_name="jdoe").primary_name == "jdoe" + assert make_contributor({"jdoe", "John Doe"}, primary_name="jdoe").primary_name == "jdoe" + assert make_contributor({"jdoe", "John Doe", "jdoe123456789"}, primary_name="jdoe").primary_name == "jdoe" + assert make_contributor({"jdoe123456789", "John Doe", "jdoe"}, primary_name="jdoe").primary_name == "jdoe" + + +def test_contributor_notice_mention_as(): + + john = Contributor(name="john", email="john@foo") + + assert john.names == {"john"} + assert john.emails == {"john@foo"} + + john.notice_mention_as(name="jack", email="john@foo") + + assert john.names == {"john", "jack"} + assert john.emails == {"john@foo"} + + john.notice_mention_as(name="john", email="jack@bar") + + assert john.names == {"john", "jack"} + assert john.emails == {"john@foo", "jack@bar"} + + john.notice_mention_as(name="john", email="john@foo") + + assert john.names == {"john", "jack"} + assert john.emails == {"john@foo", "jack@bar"} + + +def test_contributor_as_dict(): + + john = Contributor(name="john", email="john@foo") + + assert john.as_dict() == { + "names": ["john"], + "emails": ["john@foo"] + } + + john.notice_mention_as(name="jack", email="john@foo") + + assert john.as_dict() == { + "names": ["jack", "john"], + "emails": ["john@foo"] + } + + john.notice_mention_as(name="Richard", email="john@foo") + + assert john.as_dict() == { + # As it happens, our sort is case-sensitive. See notes in source, but that's just because it was easiest. + # This test is just to notice on a simple example if that changes, since bigger examples could be harder + # to debug. -kmp 27-Jul-2023 + "names": ["Richard", "jack", "john"], + "emails": ["john@foo"] + } + + +def test_contributor_index_by_primary_name(): + + john = Contributor(names={"John Doe", "jdoe"}, emails={"jdoe@a.foo"}) + jane = Contributor(names={"jsmith", "Jane Smith"}, emails={"jsmith@b.foo"}) + + idx1 = { + "John Doe": john, + "Jane Smith": jane, + "jdoe": john, + "jsmith": jane, + } + + assert list(Contributions.contributor_index_by_primary_name(idx1).items()) == [ + ("Jane Smith", jane), + ("John Doe", john), + ] + + idx2 = { + "jdoe": john, + "jsmith": jane, + } + + assert list(Contributions.contributor_index_by_primary_name(idx2).items()) == [ + ("Jane Smith", jane), + ("John Doe", john), + ] diff --git a/test/test_misc.py b/test/test_misc.py index f77253cc6..b790b8d6a 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -2,7 +2,7 @@ import pytest from dcicutils.license_utils import C4PythonInfrastructureLicenseChecker -from dcicutils.qa_checkers import DocsChecker, DebuggingArtifactChecker, ChangeLogChecker +from dcicutils.qa_checkers import DocsChecker, DebuggingArtifactChecker, ChangeLogChecker, ContributionsChecker from .conftest_settings import REPOSITORY_ROOT_DIR @@ -48,3 +48,8 @@ class MyChangeLogChecker(ChangeLogChecker): def test_license_compatibility(): C4PythonInfrastructureLicenseChecker.validate() + + +@pytest.mark.static +def test_contributions(): + ContributionsChecker.validate() From 01df2549af4a458f19b66642e6740d60f5ca4042 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 27 Jul 2023 14:25:44 -0400 Subject: [PATCH 02/17] Add a CONTRIBUTORS.json --- CONTRIBUTORS.json | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 CONTRIBUTORS.json diff --git a/CONTRIBUTORS.json b/CONTRIBUTORS.json new file mode 100644 index 000000000..5c50c1db3 --- /dev/null +++ b/CONTRIBUTORS.json @@ -0,0 +1,143 @@ +{ + "forked_at": "2017-11-27T11:53:17-05:00", + "excluded_fork": null, + "pre_fork_contributors_by_name": null, + "contributors_by_name": { + "Alex Balashov": { + "emails": [ + "alexander_balashov@hms.harvard.edu" + ], + "names": [ + "Alex Balashov", + "Alexander Balashov" + ] + }, + "Alexander Veit": { + "emails": [ + "53857412+alexander-veit@users.noreply.github.com" + ], + "names": [ + "Alexander Veit" + ] + }, + "Andrea Cosolo": { + "emails": [ + "58441550+andreacosolo@users.noreply.github.com" + ], + "names": [ + "Andrea Cosolo" + ] + }, + "Andy Schroeder": { + "emails": [ + "andrew_schroeder@hms.harvard.edu" + ], + "names": [ + "Andy Schroeder", + "aschroed" + ] + }, + "Carl Vitzthum": { + "emails": [ + "carl.vitzthum@gmail.com", + "carl_vitzthum@hms.harvard.edu" + ], + "names": [ + "Carl Vitzthum" + ] + }, + "David Michaels": { + "emails": [ + "david_michaels@hms.harvard.edu", + "dmichaels@gmail.com", + "105234079+dmichaels-harvard@users.noreply.github.com" + ], + "names": [ + "David Michaels", + "dmichaels-harvard" + ] + }, + "Douglas Rioux": { + "emails": [ + "douglas_rioux@hms.harvard.edu", + "58236592+drio18@users.noreply.github.com" + ], + "names": [ + "Douglas Rioux", + "drio18" + ] + }, + "Eric Berg": { + "emails": [ + "Eric_Berg@hms.harvard.edu" + ], + "names": [ + "Eric Berg" + ] + }, + "Jeremy Johnson": { + "emails": [ + "jeremy@softworks.com.my" + ], + "names": [ + "Jeremy Johnson", + "j1z0" + ] + }, + "Kent M Pitman": { + "emails": [ + "netsettler@users.noreply.github.com", + "kent_pitman@hms.harvard.edu" + ], + "names": [ + "Kent M Pitman", + "Kent Pitman" + ] + }, + "Koray K\u0131rl\u0131": { + "emails": [ + "KorayKirli@users.noreply.github.com", + "koray_kirli@hms.harvard.edu" + ], + "names": [ + "Koray K\u0131rl\u0131" + ] + }, + "Luisa Mercado": { + "emails": [ + "42242797+Lmercadom@users.noreply.github.com" + ], + "names": [ + "Lmercadom", + "Luisa Mercado" + ] + }, + "Sarah Reiff": { + "emails": [ + "sarah_reiff@hms.harvard.edu" + ], + "names": [ + "Sarah", + "Sarah Reiff" + ] + }, + "Soo Lee": { + "emails": [ + "duplexa@gmail.com" + ], + "names": [ + "Soo Lee", + "SooLee" + ] + }, + "Will Ronchetti": { + "emails": [ + "wrr33@cornell.edu" + ], + "names": [ + "Will Ronchetti", + "William Ronchetti" + ] + } + } +} From 7c2e9ceebb19a67429a0bf83c7dff56cb3a8b905 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 27 Jul 2023 15:04:09 -0400 Subject: [PATCH 03/17] Add some coverage tests. --- dcicutils/contribution_utils.py | 2 +- test/test_contribution_utils.py | 26 +++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 23f24ad59..c117fc837 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -64,7 +64,7 @@ def __init__(self, *, email: Optional[str] = None, name: Optional[str] = None, # even though email is not. The primary_name is not required, and defaults to None, so will # be heuristically computed based on available names. if not email and not emails: - raise ValueError("One of email= or emails= is required when initializing a Contributor.") + raise ValueError("One of email= or emails= is required.") if email and emails: raise ValueError("Only one of email= and emails= may be provided.") if not emails: diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index c02ab8151..fdfbe51fd 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -1,5 +1,6 @@ import contextlib import os +import pytest from dcicutils import contribution_utils as contribution_utils_module from dcicutils.contribution_utils import Contributor, Contributions, GitAnalysis @@ -87,7 +88,7 @@ def test_git_analysis_git_commits(): "message": "something else" } ]}): - foo_repo = GitAnalysis.find_repo("foo") + foo_repo = GitAnalysis.find_repo('foo') foo_commits = list(foo_repo.iter_commits()) assert len(foo_commits) == 2 assert all(isinstance(commit, MockGitCommit) for commit in foo_commits) @@ -96,8 +97,10 @@ def test_git_analysis_git_commits(): assert foo_commits[0].hexsha == 'aaaa' assert foo_commits[1].hexsha == 'bbbb' + assert [GitAnalysis.json_for_commit(commit) for commit in foo_commits] == list(GitAnalysis.git_commits('foo')) + -def test_git_analysis_iter_commits_scenario(): # This tests .iter_commits, .json_for_commit and .json_for_actor +def test_git_analysis_iter_commits_scenario(): # Tests .iter_commits, .json_for_commit, .json_for_actor, .git_commits with git_context(mocked_commits={"foo": [ { @@ -115,7 +118,8 @@ def test_git_analysis_iter_commits_scenario(): # This tests .iter_commits, .jso } ]}): foo_repo = GitAnalysis.find_repo("foo") - assert [GitAnalysis.json_for_commit(commit) for commit in foo_repo.iter_commits()] == [ + foo_commits_as_json = [GitAnalysis.json_for_commit(commit) for commit in foo_repo.iter_commits()] + assert foo_commits_as_json == [ { 'author': {'email': 'jdoe@foo', 'name': 'Jdoe'}, 'coauthors': [], @@ -132,6 +136,8 @@ def test_git_analysis_iter_commits_scenario(): # This tests .iter_commits, .jso } ] + assert foo_commits_as_json == list(GitAnalysis.git_commits('foo')) + def test_contributor_str(): @@ -145,6 +151,20 @@ def test_contributor_str(): assert str(jdoe) == "" + # Edge cases... + + with pytest.raises(ValueError) as exc: + Contributor() + assert str(exc.value) == "One of email= or emails= is required." + + with pytest.raises(ValueError) as exc: + Contributor(email='foo@bar', emails={'foo@bar'}) + assert str(exc.value) == 'Only one of email= and emails= may be provided.' + + with pytest.raises(ValueError) as exc: + Contributor(name='John', names={'John'}, email="foo@bar") + assert str(exc.value) == 'Only one of name= and names= may be provided.' + def test_contributor_set_primary_name(): From f64cb6532de5e46b7f9ae9c70833cfa0a2f68ac0 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 27 Jul 2023 17:27:42 -0400 Subject: [PATCH 04/17] Additional test coverage. --- dcicutils/contribution_utils.py | 5 +++ test/test_contribution_utils.py | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index c117fc837..7e0a45e4f 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -464,8 +464,13 @@ def load_from_dict(self, data: Dict): @classmethod def by_email_from_by_name(cls, contributors_by_email_json): result = {} + seen = set() for email_key, entry in contributors_by_email_json.items(): ignored(email_key) + seen_key = id(entry) + if seen_key in seen: + continue + seen.add(seen_key) for email in entry.get("emails", []) if isinstance(entry, dict) else entry.emails: if result.get(email): raise Exception(f"email address {email} is used more than once.") diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index fdfbe51fd..52ea7477e 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -306,3 +306,71 @@ def test_contributor_index_by_primary_name(): ("Jane Smith", jane), ("John Doe", john), ] + + +def test_contributions_by_name_from_by_email(): + + email_a = "a@somewhere" + email_alpha = "alpha@somewhere" + + email_b = "b@somewhere" + email_beta = "beta@somewhere" + + name_ajones = "ajones" + name_art = "Art Jones" + + name_bsmith = "bsmith" + name_betty = "Betty Smith" + + art_jones = Contributor(names={name_art, name_ajones}, emails={email_a, email_alpha}) + betty_smith = Contributor(names={name_betty, name_bsmith}, emails={email_b, email_beta}) + + # We should really just need one entry per each person + + by_name_contributors_short = { + name_art: art_jones, + name_betty: betty_smith, + } + + assert Contributions.by_email_from_by_name(by_name_contributors_short) == { + email_a: art_jones, + email_alpha: art_jones, + email_b: betty_smith, + email_beta: betty_smith, + } + + # It's OK for there to be more than one entry, as long as the entries share a single contributor object + # (or contributor json dictionary) + + by_name_contributors = { + name_ajones: art_jones, + name_art: art_jones, + name_bsmith: betty_smith, + name_betty: betty_smith, + } + + assert Contributions.by_email_from_by_name(by_name_contributors) == { + email_a: art_jones, + email_alpha: art_jones, + email_b: betty_smith, + email_beta: betty_smith, + } + + # It works for the targets of the dictionary to be either Contributors or JSON represented Contributors. + + art_jones_json = art_jones.as_dict() + betty_smith_json = betty_smith.as_dict() + + by_name_json = { + name_ajones: art_jones_json, + name_art: art_jones_json, + name_bsmith: betty_smith_json, + name_betty: betty_smith_json, + } + + assert Contributions.by_email_from_by_name(by_name_json) == { + email_a: art_jones_json, + email_alpha: art_jones_json, + email_b: betty_smith_json, + email_beta: betty_smith_json, + } From 7fa5ad40eeda1771f57ecfe3ec1766f27cedf1f0 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 27 Jul 2023 17:55:31 -0400 Subject: [PATCH 05/17] Test coverage. --- test/test_contribution_utils.py | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index 52ea7477e..7d8b7703c 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -1,4 +1,5 @@ import contextlib +import git import os import pytest @@ -139,6 +140,18 @@ def test_git_analysis_iter_commits_scenario(): # Tests .iter_commits, .json_for assert foo_commits_as_json == list(GitAnalysis.git_commits('foo')) +def test_contributor_create(): + + # This should execute without error + c1 = Contributor(name="John Doe", email="john@whatever") + + with mock.patch.object(git, "Actor", MockGitActor): + + c2 = Contributor.create(author=git.Actor(name="John Doe", email="john@whatever")) + + assert c1.as_dict() == c2.as_dict() + + def test_contributor_str(): with mock.patch.object(contribution_utils_module, "id", MockId(1000)): @@ -230,6 +243,23 @@ def make_contributor(names, primary_name=None): assert make_contributor({"jdoe123456789", "John Doe", "jdoe"}, primary_name="jdoe").primary_name == "jdoe" +def test_contributor_from_dict(): + + joe_json = {'names': ["Joe"], "emails": ["joe@wherever"]} + + joe_obj = Contributor.from_dict(joe_json) + + assert isinstance(joe_obj, Contributor) + assert list(joe_obj.emails) == joe_json['emails'] + assert list(joe_obj.names) == joe_json['names'] + + assert joe_obj.as_dict() == joe_json + + joe_obj2 = Contributor.from_dict(joe_json, key="Joseph") + assert joe_obj.names != joe_obj2.names + assert joe_obj.names | {'Joseph'} == joe_obj2.names + + def test_contributor_notice_mention_as(): john = Contributor(name="john", email="john@foo") @@ -374,3 +404,14 @@ def test_contributions_by_name_from_by_email(): email_b: betty_smith_json, email_beta: betty_smith_json, } + + # You can't have conflicts between email addresses. That's supposed to have been resolved earlier by unification. + + by_name_json_with_dups = { + "Art Jones": {"names": ["Art Jones"], "emails": ["ajones@somewhere"]}, + "Arthur Jones": {"names": ["Arthur Jones"], "emails": ["ajones@somewhere"]} + } + + with pytest.raises(Exception) as exc: + Contributions.by_email_from_by_name(by_name_json_with_dups) + assert str(exc.value) == "email address ajones@somewhere is used more than once." From 8d52b69e416b8e5b339559a51ae8ae6e0cdbec6c Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 27 Jul 2023 20:48:19 -0400 Subject: [PATCH 06/17] Coverage for script file. --- dcicutils/contribution_scripts.py | 4 +- test/test_contribution_scripts.py | 63 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 test/test_contribution_scripts.py diff --git a/dcicutils/contribution_scripts.py b/dcicutils/contribution_scripts.py index bc4091802..99d818a8d 100644 --- a/dcicutils/contribution_scripts.py +++ b/dcicutils/contribution_scripts.py @@ -14,7 +14,7 @@ def show_contributors(repo, exclude_fork=None, verbose=False, save_contributors= contributions.show_repo_contributors(error_class=ScriptFailure if test else None) -def show_contributors_main(): +def show_contributors_main(*, simulated_args=None): parser = argparse.ArgumentParser( # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here. description=(f"Show authors of a specified repository, which will be presumed" f" to have been cloned as a subdirectory of $PROJECT_HOME ({PROJECT_HOME})"), @@ -31,7 +31,7 @@ def show_contributors_main(): help="whether to treat this as a test, erring if a cache update is needed") parser.add_argument('--verbose', '-v', action="store_true", default=False, help="whether to do verbose output while working") - args = parser.parse_args() + args = parser.parse_args(args=simulated_args) with script_catch_errors(): diff --git a/test/test_contribution_scripts.py b/test/test_contribution_scripts.py new file mode 100644 index 000000000..8fa040c1e --- /dev/null +++ b/test/test_contribution_scripts.py @@ -0,0 +1,63 @@ +import pytest + +from dcicutils import contribution_scripts as contribution_scripts_module +from dcicutils.command_utils import ScriptFailure +from dcicutils.contribution_scripts import show_contributors, show_contributors_main +from unittest import mock + + +class MockContributions: + + def __init__(self, repo, exclude_fork=None, verbose=False): + self.repo = repo + self.exclude_fork = exclude_fork + self.verbose = verbose + + +def test_show_contributors(): + with mock.patch.object(contribution_scripts_module, "Contributions") as mock_contributions: + contributions_object = mock.MagicMock() + mock_contributions.return_value = contributions_object + + show_contributors(repo='my-repo') + + assert contributions_object.save_contributor_data.call_count == 0 + + mock_contributions.assert_called_once_with(repo='my-repo', exclude_fork=None, verbose=False) + contributions_object.show_repo_contributors.assert_called_once_with(error_class=None) + + mock_contributions.reset_mock() + contributions_object = mock.MagicMock() + mock_contributions.return_value = contributions_object + + show_contributors(repo='another-repo', exclude_fork='whatever', save_contributors=True, verbose=True, test=True) + + mock_contributions.assert_called_once_with(repo='another-repo', exclude_fork='whatever', verbose=True) + contributions_object.show_repo_contributors.assert_called_once_with(error_class=ScriptFailure) + contributions_object.save_contributor_data.assert_called_once_with() + + +def test_show_contributors_main(): + + with mock.patch.object(contribution_scripts_module, "show_contributors") as mock_show_contributors: + + with pytest.raises(SystemExit) as exc: + + show_contributors_main(simulated_args=['some-repo']) + + assert exc.value.code == 0 + + mock_show_contributors.assert_called_once_with(repo='some-repo', exclude_fork=None, verbose=False, + save_contributors=False, test=False) + + mock_show_contributors.reset_mock() + + with pytest.raises(SystemExit) as exc: + + show_contributors_main(simulated_args=['my-repo', '--exclude', 'their-repo', + '--save-contributors', '--test', '--verbose']) + + assert exc.value.code == 0 + + mock_show_contributors.assert_called_once_with(repo='my-repo', exclude_fork='their-repo', + save_contributors=True, test=True, verbose=True) From 95b6e3710dcd68104183753eff1d9c93fa8eb97b Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 00:27:30 -0400 Subject: [PATCH 07/17] Testing of qa_checkers. --- dcicutils/contribution_utils.py | 12 ++++-- test/test_contribution_utils.py | 19 ++++++---- test/test_qa_checkers.py | 65 ++++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 13 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 7e0a45e4f..515ab5c21 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -44,7 +44,7 @@ def json_for_actor(cls, actor: git.Actor) -> Dict: def json_for_commit(cls, commit: git.Commit) -> Dict: return { 'commit': commit.hexsha, - 'date': commit.authored_datetime, + 'date': commit.committed_datetime.isoformat(), 'author': cls.json_for_actor(commit.author), 'coauthors': [cls.json_for_actor(co_author) for co_author in commit.co_authors], 'message': commit.message, @@ -227,7 +227,11 @@ def contributors_json_file(self): """ Returns the name of the CONTRIBUTORS.json file for the repo associated with this class. """ - return os.path.join(PROJECT_HOME, self.repo, self.CONTRIBUTORS_CACHE_FILE) + return self.contributors_json_file_for_repo(self.repo) + + @classmethod + def contributors_json_file_for_repo(cls, repo): + return os.path.join(PROJECT_HOME, repo, cls.CONTRIBUTORS_CACHE_FILE) def existing_contributors_json_file(self): """ @@ -441,7 +445,9 @@ def as_dict(self): def load_from_dict(self, data: Dict): forked_at: Optional[str] = data.get('forked_at') excluded_fork = data.get('excluded_fork') - self.forked_at = None if forked_at is None else datetime.datetime.fromisoformat(forked_at) + self.forked_at: Optional[datetime.datetime] = (None + if forked_at is None + else datetime.datetime.fromisoformat(forked_at)) self.exclude_fork = excluded_fork fork_contributors_by_name_json = data.get('pre_fork_contributors_by_name') or {} diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index 7d8b7703c..47fd7b17f 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -1,4 +1,5 @@ import contextlib +import datetime import git import os import pytest @@ -18,10 +19,10 @@ def __init__(self, name: Optional[str], email: str): class MockGitCommit: - def __init__(self, hexsha: str, authored_datetime: str, author: dict, + def __init__(self, hexsha: str, committed_datetime: str, author: dict, message: str, co_authors: Optional[List[dict]] = None): self.hexsha = hexsha - self.authored_datetime = authored_datetime + self.committed_datetime = datetime.datetime.fromisoformat(committed_datetime) self.author = MockGitActor(**author) self.co_authors = [MockGitActor(**co_author) for co_author in (co_authors or [])] self.message = message @@ -50,6 +51,8 @@ def Repo(self, path): repo_name = os.path.basename(path) return MockGitRepo(name=repo_name, path=path, mocked_commits=self._mocked_commits.get(repo_name, [])) + Actor = MockGitActor + SAMPLE_USER_HOME = "/home/jdoe" SAMPLE_PROJECT_HOME = f"{SAMPLE_USER_HOME}/repos" @@ -78,13 +81,13 @@ def test_git_analysis_git_commits(): with git_context(mocked_commits={"foo": [ { "hexsha": "aaaa", - "authored_datetime": "2020-01-01 01:23:45", + "committed_datetime": "2020-01-01 01:23:45", "author": {"name": "Jdoe", "email": "jdoe@foo"}, "message": "something" }, { "hexsha": "bbbb", - "authored_datetime": "2020-01-02 12:34:56", + "committed_datetime": "2020-01-02 12:34:56", "author": {"name": "Sally", "email": "ssmith@foo"}, "message": "something else" } @@ -106,13 +109,13 @@ def test_git_analysis_iter_commits_scenario(): # Tests .iter_commits, .json_for with git_context(mocked_commits={"foo": [ { "hexsha": "aaaa", - "authored_datetime": "2020-01-01 01:23:45", + "committed_datetime": "2020-01-01T01:23:45-05:00", "author": {"name": "Jdoe", "email": "jdoe@foo"}, "message": "something" }, { "hexsha": "bbbb", - "authored_datetime": "2020-01-02 12:34:56", + "committed_datetime": "2020-01-02T12:34:56-05:00", "author": {"name": "Sally", "email": "ssmith@foo"}, "co_authors": [{"name": "William Simmons", "email": "bill@someplace"}], "message": "something else" @@ -125,14 +128,14 @@ def test_git_analysis_iter_commits_scenario(): # Tests .iter_commits, .json_for 'author': {'email': 'jdoe@foo', 'name': 'Jdoe'}, 'coauthors': [], 'commit': 'aaaa', - 'date': '2020-01-01 01:23:45', + 'date': '2020-01-01T01:23:45-05:00', 'message': 'something' }, { 'author': {'email': 'ssmith@foo', 'name': 'Sally'}, 'coauthors': [{'email': 'bill@someplace', 'name': 'William Simmons'}], 'commit': 'bbbb', - 'date': '2020-01-02 12:34:56', + 'date': '2020-01-02T12:34:56-05:00', 'message': 'something else' } ] diff --git a/test/test_qa_checkers.py b/test/test_qa_checkers.py index 9f0d0ca85..e158f8527 100644 --- a/test/test_qa_checkers.py +++ b/test/test_qa_checkers.py @@ -1,12 +1,18 @@ +import io +import json import os import pytest +from dcicutils import contribution_utils as contribution_utils_module +from dcicutils.contribution_utils import Contributions from dcicutils.misc_utils import lines_printed_to, remove_prefix from dcicutils.qa_checkers import ( DebuggingArtifactChecker, DocsChecker, ChangeLogChecker, VersionChecker, confirm_no_uses, find_uses, + ContributionsChecker ) from dcicutils.qa_utils import MockFileSystem, printed_output from unittest import mock +from .test_contribution_utils import git_context, SAMPLE_PROJECT_HOME from .conftest_settings import TEST_DIR @@ -252,7 +258,7 @@ def test_debugging_artifact_checker(): assert isinstance(exc.value, AssertionError) assert str(exc.value) == "1 problem detected:\n In foo/bar.py, 1 call to print." - assert printed.lines == [] # We might at some point print the actual problems, but we don't now. + assert printed.lines == [] # We might at some point print the actual problems, but we don't know. with lines_printed_to("foo/bar.py") as out: out('x = 1') @@ -262,4 +268,59 @@ def test_debugging_artifact_checker(): assert isinstance(exc.value, AssertionError) assert str(exc.value) == "1 problem detected:\n In foo/bar.py, 1 active use of pdb.set_trace." - assert printed.lines == [] # We might at some point print the actual problems, but we don't now. + assert printed.lines == [] # We might at some point print the actual problems, but we don't know. + + +@mock.patch.object(contribution_utils_module, "PROJECT_HOME", SAMPLE_PROJECT_HOME) +def test_contribution_checker(): + + print() # start on a fresh line + some_repo_name = 'foo' + mfs = MockFileSystem() + with mfs.mock_exists_open_remove_abspath_getcwd_chdir(): + contributions_cache_file = Contributions.contributors_json_file_for_repo(some_repo_name) + print(f"contributions_cache_file={contributions_cache_file}") + os.chdir(os.path.join(SAMPLE_PROJECT_HOME, some_repo_name)) + print(f"working dir={os.getcwd()}") + with io.open(contributions_cache_file, 'w') as fp: + cache_data = { + "forked_at": "2015-01-01T12:34:56-05:00", + "excluded_fork": None, + "pre_fork_contributors_by_name": None, + "contributors_by_name": { + "John Smith": { + "emails": ["jsmith@somewhere"], + "names": ["John Smith"], + } + } + } + json.dump(cache_data, fp=fp) + mocked_commits = { + some_repo_name: [ + { + "hexsha": "aaaa", + "committed_datetime": "2016-01-01T01:23:45-05:00", + "author": {"name": "John Smith", "email": "jsmith@somewhere"}, + "message": "something" + }, + { + "hexsha": "bbbb", + "committed_datetime": "2017-01-02T12:34:56-05:00", + "author": {"name": "Sally", "email": "ssmith@elsewhere"}, + "message": "something else" + } + ] + } + with git_context(mocked_commits=mocked_commits): + with printed_output() as printed: + with pytest.raises(AssertionError) as exc: + ContributionsChecker.validate() + assert str(exc.value) == "There are contributor cache discrepancies." + assert printed.lines == [ + "John Smith (jsmith@somewhere)", + "Sally (ssmith@elsewhere)", + "===== THERE ARE CONTRIBUTOR CACHE DISCREPANCIES =====", + "To Add:", + " * contributors.Sally.emails.ssmith@elsewhere", + " * contributors.Sally.names.Sally", + ] From 06941873781f76b5feda5cb3d7290e249bd43a8e Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 03:41:50 -0400 Subject: [PATCH 08/17] More coverage --- dcicutils/contribution_utils.py | 23 ++++++++------ test/test_contribution_utils.py | 54 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 515ab5c21..3a8c9b211 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -368,14 +368,16 @@ def contributor_index_by_primary_name(cls, contributors_by_name: ContributorInde :return: a contributor index """ seen = set() + nicknames_seen = set() contributor_items = [] contributors = {} for name, contributor in contributors_by_name.items(): if contributor not in seen: for nickname in contributor.names: - if nickname in seen: + if nickname in nicknames_seen: raise Exception(f"Name improperly shared between {contributor}" f" and {contributors_by_name[nickname]}") + nicknames_seen.add(nickname) contributor_items.append((contributor.primary_name, contributor)) seen.add(contributor) for name, contributor in sorted(contributor_items, @@ -384,15 +386,16 @@ def contributor_index_by_primary_name(cls, contributors_by_name: ContributorInde contributors[name] = contributor return contributors - def traverse(self, + @classmethod + def traverse(cls, root: Contributor, - cursor: Contributor, + cursor: Optional[Contributor], contributors_by_email: ContributorIndex, contributors_by_name: ContributorIndex, seen: Optional[Set[Contributor]] = None): if seen is None: seen = set() - if cursor in seen: + if cursor in seen: # It's slightly possible that a person has a name of None that slipped in. Ignore that. return seen.add(cursor) for name in list(cursor.names): @@ -402,16 +405,16 @@ def traverse(self, for name in list(cursor.names): contributor = contributors_by_name.get(name) if contributor and contributor not in seen: - self.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, - contributors_by_name=contributors_by_name, seen=seen) + cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, + contributors_by_name=contributors_by_name, seen=seen) for email in list(cursor.emails): contributor = contributors_by_email.get(email) if contributor and contributor not in seen: - self.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, - contributors_by_name=contributors_by_name, seen=seen) + cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, + contributors_by_name=contributors_by_name, seen=seen) @classmethod - def contributor_values_as_dicts(cls, contributor_index: ContributorIndex): + def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]): if contributor_index is None: return None else: @@ -421,7 +424,7 @@ def contributor_values_as_dicts(cls, contributor_index: ContributorIndex): } @classmethod - def contributor_values_as_objects(cls, contributor_index: Dict): + def contributor_values_as_objects(cls, contributor_index: Optional[Dict]): if contributor_index is None: return None else: diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index 47fd7b17f..a070ec5c9 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -313,6 +313,33 @@ def test_contributor_as_dict(): } +def test_contributor_values_as_objects(): + + assert Contributions.contributor_values_as_objects(None) is None + + idx_json = {'jdoe': {'names': ['jdoe'], 'emails': ['jdoe@somewhere']}} + idx = Contributions.contributor_values_as_objects(idx_json) + jdoe = idx['jdoe'] + + assert isinstance(jdoe, Contributor) + assert jdoe.names == set(idx_json['jdoe']['names']) + assert jdoe.emails == set(idx_json['jdoe']['emails']) + + +def test_contributor_values_as_dicts(): + + assert Contributions.contributor_values_as_dicts(None) is None + + jdoe = Contributor(names={'jdoe'}, emails={'jdoe@somewhere'}) + idx = {'jdoe': jdoe} + idx_json = Contributions.contributor_values_as_dicts(idx) + jdoe_json = idx_json['jdoe'] + + assert isinstance(jdoe_json, dict) + assert set(jdoe_json['names']) == jdoe.names + assert set(jdoe_json['emails']) == jdoe.emails + + def test_contributor_index_by_primary_name(): john = Contributor(names={"John Doe", "jdoe"}, emails={"jdoe@a.foo"}) @@ -340,6 +367,18 @@ def test_contributor_index_by_primary_name(): ("John Doe", john), ] + idx3 = { + "John Doe": john, + "jdoe": john.copy(), # if the info is here twice, it must be in a shared pointer, not a separate object + "jsmith": jane, + } + + with pytest.raises(Exception) as exc: + # This should notice the improper duplication. + Contributions.contributor_index_by_primary_name(idx3) + + assert str(exc.value).startswith("Name improperly shared") + def test_contributions_by_name_from_by_email(): @@ -418,3 +457,18 @@ def test_contributions_by_name_from_by_email(): with pytest.raises(Exception) as exc: Contributions.by_email_from_by_name(by_name_json_with_dups) assert str(exc.value) == "email address ajones@somewhere is used more than once." + + +def test_contributions_traverse_terminal_node(): + + mariela = Contributor(names={'mari', 'mariela'}, emails={'mariela@somewhere', 'mari@elsewhere'}) + mari = Contributor(names={'mari'}, emails={'mariela@somewhere', 'mari@elsewhere'}) + + seen = {mari, mariela} + originally_seen = seen.copy() + + Contributions.traverse(root=mariela, contributors_by_name={}, contributors_by_email={}, + # We're testing the cursor=None case + seen=seen, cursor=mari) + + assert seen == originally_seen From 2f715a4ca69fdff9300d302480f9780b3be7ea60 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 04:09:13 -0400 Subject: [PATCH 09/17] More test coverage. --- dcicutils/contribution_utils.py | 6 ++++- test/test_contribution_utils.py | 42 ++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 3a8c9b211..52ff8bd76 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -238,7 +238,11 @@ def existing_contributors_json_file(self): Returns the name of the CONTRIBUTORS.json file for the repo associated with this class if that file exists, or None if there is no such file. """ - file = self.contributors_json_file() + self.existing_contributors_json_file_for_repo(self.repo) + + @classmethod + def existing_contributors_json_file_for_repo(cls, repo): + file = cls.contributors_json_file_for_repo(repo) if os.path.exists(file): return file else: diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index a070ec5c9..2af70a3fe 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -1,13 +1,14 @@ import contextlib import datetime import git +import io import os import pytest from dcicutils import contribution_utils as contribution_utils_module from dcicutils.contribution_utils import Contributor, Contributions, GitAnalysis from dcicutils.misc_utils import make_counter # , override_environ -from dcicutils.qa_utils import MockId +from dcicutils.qa_utils import MockId, MockFileSystem from typing import List, Optional from unittest import mock @@ -472,3 +473,42 @@ def test_contributions_traverse_terminal_node(): seen=seen, cursor=mari) assert seen == originally_seen + + +def test_contributions_pretty_email(): + + assert Contributions.pretty_email('joe@foo.com') == 'joe@foo.com' + assert Contributions.pretty_email('joe@users.noreply.github.com') == 'joe@github' + assert Contributions.pretty_email('12345+joe@users.noreply.github.com') == 'joe@github' + + +def test_notice_reference_time(): + + timestamps = {} + timestamp0 = datetime.datetime(2020, 7, 1, 12, 0, 0) + timestamp1 = datetime.datetime(2020, 7, 1, 12, 0, 1) + timestamp2 = datetime.datetime(2020, 7, 1, 12, 0, 2) + key = 'joe' + + Contributions.notice_reference_time(key=key, timestamp=timestamp1, timestamps=timestamps) + assert timestamps == {key: timestamp1} + + Contributions.notice_reference_time(key=key, timestamp=timestamp0, timestamps=timestamps) + assert timestamps == {key: timestamp1} + + Contributions.notice_reference_time(key=key, timestamp=timestamp2, timestamps=timestamps) + assert timestamps == {key: timestamp2} + + +def test_existing_contributors_json_file_for_repo(): + + mfs = MockFileSystem() + + with mfs.mock_exists_open_remove(): + assert Contributions.existing_contributors_json_file_for_repo('foo') is None + + cache_file = Contributions.contributors_json_file_for_repo('foo') + with io.open(cache_file, 'w') as fp: + fp.write('something') + + assert Contributions.existing_contributors_json_file_for_repo('foo') == cache_file From c8b3e8d92f3b183e07dde1072c9f660ee68d0894 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 09:35:08 -0400 Subject: [PATCH 10/17] Refactor for easier testing. Add some test coverage. --- dcicutils/contribution_utils.py | 99 +++++++++++++++++---------------- test/test_contribution_utils.py | 58 +++++++++++++++++-- 2 files changed, 104 insertions(+), 53 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 52ff8bd76..238da62c9 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -137,13 +137,16 @@ def name_variation_spelling_sort_key(cls, name): ContributorIndex = Optional[Dict[str, Contributor]] -class Contributions(GitAnalysis): +class BasicContributions(GitAnalysis): VERBOSE = False def __init__(self, *, repo: Optional[str] = None, exclude_fork: Optional[str] = None, verbose: Optional[bool] = None): + self.email_timestamps: Dict[str, datetime.datetime] = {} + self.name_timestamps: Dict[str, datetime.datetime] = {} + self.exclude_fork: Optional[str] = exclude_fork if not repo: # Doing it this way gets around an ambiguity about '/foo/' vs '/foo' since both # os.path.join('/foo/', 'bar') and os.path.join('/foo', 'bar') yield '/foo/bar', @@ -151,19 +154,58 @@ def __init__(self, *, repo: Optional[str] = None, cache_file = os.path.join(os.path.abspath(os.path.curdir), self.CONTRIBUTORS_CACHE_FILE) dir = os.path.dirname(cache_file) repo = os.path.basename(dir) - self.email_timestamps: Dict[str, datetime.datetime] = {} - self.name_timestamps: Dict[str, datetime.datetime] = {} self.repo: str = repo - self.exclude_fork: Optional[str] = exclude_fork self.excluded_contributions = None self.forked_at: Optional[datetime.datetime] = None self.contributors_by_name: Optional[ContributorIndex] = None self.contributors_by_email: Optional[ContributorIndex] = None self.pre_fork_contributors_by_email: Optional[ContributorIndex] = None self.pre_fork_contributors_by_name: Optional[ContributorIndex] = None - self.verbose = self.VERBOSE if verbose is None else verbose self.loaded_contributor_data = None self.cache_discrepancies: Optional[dict] = None + self.verbose = self.VERBOSE if verbose is None else verbose + + CONTRIBUTORS_CACHE_FILE = 'CONTRIBUTORS.json' + + def contributors_json_file(self): + """ + Returns the name of the CONTRIBUTORS.json file for the repo associated with this class. + """ + return os.path.join(PROJECT_HOME, self.repo, self.CONTRIBUTORS_CACHE_FILE) + + def existing_contributors_json_file(self): + """ + Returns the name of the CONTRIBUTORS.json file for the repo associated with this class if that file exists, + or None if there is no such file. + """ + file = self.contributors_json_file() + if os.path.exists(file): + return file + else: + return None + + @classmethod + def notice_reference_time(cls, key: str, timestamp: datetime.datetime, timestamps: Dict[str, datetime.datetime]): + reference_timestamp: datetime.datetime = timestamps.get(key) + if not reference_timestamp: + timestamps[key] = timestamp + elif timestamp > reference_timestamp: + timestamps[key] = timestamp + + def email_reference_time(self, email): + return self.email_timestamps.get(email) + + def name_reference_time(self, name): + return self.name_timestamps.get(name) + + + +class Contributions(BasicContributions): + + def __init__(self, *, repo: Optional[str] = None, + exclude_fork: Optional[str] = None, + verbose: Optional[bool] = None): + super().__init__(repo=repo, exclude_fork=exclude_fork, verbose=verbose) existing_contributor_data_file = self.existing_contributors_json_file() if existing_contributor_data_file: # This will set .loaded_contributor_data and other values from CONTRIBUTORS.json @@ -208,54 +250,17 @@ def pretty_email(cls, email): return email @classmethod - def notice_reference_time(cls, key: str, timestamp: datetime.datetime, timestamps: Dict[str, datetime.datetime]): - reference_timestamp: datetime.datetime = timestamps.get(key) - if not reference_timestamp: - timestamps[key] = timestamp - elif timestamp > reference_timestamp: - timestamps[key] = timestamp - - def email_reference_time(self, email): - return self.email_timestamps.get(email) - - def name_reference_time(self, name): - return self.name_timestamps.get(name) - - CONTRIBUTORS_CACHE_FILE = 'CONTRIBUTORS.json' - - def contributors_json_file(self): - """ - Returns the name of the CONTRIBUTORS.json file for the repo associated with this class. - """ - return self.contributors_json_file_for_repo(self.repo) - - @classmethod - def contributors_json_file_for_repo(cls, repo): - return os.path.join(PROJECT_HOME, repo, cls.CONTRIBUTORS_CACHE_FILE) - - def existing_contributors_json_file(self): - """ - Returns the name of the CONTRIBUTORS.json file for the repo associated with this class if that file exists, - or None if there is no such file. - """ - self.existing_contributors_json_file_for_repo(self.repo) - - @classmethod - def existing_contributors_json_file_for_repo(cls, repo): - file = cls.contributors_json_file_for_repo(repo) - if os.path.exists(file): - return file - else: - return None - - def load_contributors_from_json_file_cache(self, filename): + def get_contributors_json_from_file_cache(cls, filename): try: with io.open(filename, 'r') as fp: data = json.load(fp) except Exception: PRINT(f"Error while reading data from {filename!r}.") raise - self.loaded_contributor_data = data + return data + + def load_contributors_from_json_file_cache(self, filename): + self.loaded_contributor_data = data = self.get_contributors_json_from_file_cache(filename) self.load_from_dict(data) if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index 2af70a3fe..9f96148a4 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -2,13 +2,15 @@ import datetime import git import io +import json import os import pytest +import re from dcicutils import contribution_utils as contribution_utils_module -from dcicutils.contribution_utils import Contributor, Contributions, GitAnalysis +from dcicutils.contribution_utils import Contributor, BasicContributions, Contributions, GitAnalysis from dcicutils.misc_utils import make_counter # , override_environ -from dcicutils.qa_utils import MockId, MockFileSystem +from dcicutils.qa_utils import MockId, MockFileSystem, printed_output from typing import List, Optional from unittest import mock @@ -500,15 +502,59 @@ def test_notice_reference_time(): assert timestamps == {key: timestamp2} -def test_existing_contributors_json_file_for_repo(): +def test_existing_contributors_json_file(): mfs = MockFileSystem() + contributions = BasicContributions(repo='foo') # don't need all the git history, etc. loaded for this test + with mfs.mock_exists_open_remove(): - assert Contributions.existing_contributors_json_file_for_repo('foo') is None - cache_file = Contributions.contributors_json_file_for_repo('foo') + assert contributions.existing_contributors_json_file() is None + + cache_file = contributions.contributors_json_file() + print("cache_file=", cache_file) with io.open(cache_file, 'w') as fp: fp.write('something') - assert Contributions.existing_contributors_json_file_for_repo('foo') == cache_file + assert contributions.existing_contributors_json_file() == cache_file + + +def test_contributions_email_reference_time(): + + contributions = BasicContributions() + now = datetime.datetime.now() + then = now - datetime.timedelta(seconds=10) + contributions.email_timestamps = {'foo@somewhere': now, 'bar@elsewhere': then} + + assert contributions.email_reference_time('foo@somewhere') == now + assert contributions.email_reference_time('bar@elsewhere') == then + assert contributions.email_reference_time('baz@anywhere') is None + + +def test_contributions_name_reference_time(): + + contributions = BasicContributions() + now = datetime.datetime.now() + then = now - datetime.timedelta(seconds=10) + contributions.name_timestamps = {'foo': now, 'bar': then} + + assert contributions.name_reference_time('foo') == now + assert contributions.name_reference_time('bar') == then + assert contributions.name_reference_time('baz') is None + + +def test_file_cache_error_reporting(): + + mfs = MockFileSystem() + with mfs.mock_exists_open_remove(): + + with io.open(Contributions.CONTRIBUTORS_CACHE_FILE, 'w') as fp: + fp.write("{bad json}") + + with printed_output() as printed: + with pytest.raises(json.JSONDecodeError) as exc: + Contributions.get_contributors_json_from_file_cache(Contributions.CONTRIBUTORS_CACHE_FILE,) + assert re.match("Expecting.*line 1 column 2.*", str(exc.value)) + assert printed.lines == ["Error while reading data from 'CONTRIBUTORS.json'."] + From fde4598baea26b45e081eaf49e2a08e62e85c1aa Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 11:46:24 -0400 Subject: [PATCH 11/17] PEP8. Suppress a warning. --- dcicutils/contribution_utils.py | 1 - pyproject.toml | 2 ++ test/test_contribution_utils.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 238da62c9..036fc3d7a 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -199,7 +199,6 @@ def name_reference_time(self, name): return self.name_timestamps.get(name) - class Contributions(BasicContributions): def __init__(self, *, repo: Optional[str] = None, diff --git a/pyproject.toml b/pyproject.toml index 4af381226..9964b37dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,9 @@ show-contributors = "dcicutils.contribution_scripts:show_contributors_main" addopts = "--basetemp=/tmp/pytest" redis_exec = "/usr/local/bin/redis-server" filterwarnings = [ + # TODO: These next two are about use of the Version class: "ignore:distutils Version classes are deprecated. Use packaging.version instead.:DeprecationWarning", + "ignore:Setuptools is replacing distutils.:UserWarning", # This is a pip-licenses problem (still present in 3.5.5): "ignore:pkg_resources is deprecated as an API:DeprecationWarning", "ignore:Boto3 will no longer support Python 3.7 starting December 13, 2023.*:", diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index 9f96148a4..b0d3ce155 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -557,4 +557,3 @@ def test_file_cache_error_reporting(): Contributions.get_contributors_json_from_file_cache(Contributions.CONTRIBUTORS_CACHE_FILE,) assert re.match("Expecting.*line 1 column 2.*", str(exc.value)) assert printed.lines == ["Error while reading data from 'CONTRIBUTORS.json'."] - From e1d03ad18660dcfaa1c2a00d97a0bed4a623121c Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 11:56:47 -0400 Subject: [PATCH 12/17] Fix a broken test. --- test/test_qa_checkers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_qa_checkers.py b/test/test_qa_checkers.py index e158f8527..9eb260814 100644 --- a/test/test_qa_checkers.py +++ b/test/test_qa_checkers.py @@ -4,7 +4,7 @@ import pytest from dcicutils import contribution_utils as contribution_utils_module -from dcicutils.contribution_utils import Contributions +from dcicutils.contribution_utils import BasicContributions, Contributions from dcicutils.misc_utils import lines_printed_to, remove_prefix from dcicutils.qa_checkers import ( DebuggingArtifactChecker, DocsChecker, ChangeLogChecker, VersionChecker, confirm_no_uses, find_uses, @@ -278,7 +278,7 @@ def test_contribution_checker(): some_repo_name = 'foo' mfs = MockFileSystem() with mfs.mock_exists_open_remove_abspath_getcwd_chdir(): - contributions_cache_file = Contributions.contributors_json_file_for_repo(some_repo_name) + contributions_cache_file = BasicContributions(repo=some_repo_name).contributors_json_file() print(f"contributions_cache_file={contributions_cache_file}") os.chdir(os.path.join(SAMPLE_PROJECT_HOME, some_repo_name)) print(f"working dir={os.getcwd()}") From ec028c285268da3107ff64ad4647e6d3a69e541b Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 12:03:43 -0400 Subject: [PATCH 13/17] PEP8 --- test/test_qa_checkers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_qa_checkers.py b/test/test_qa_checkers.py index 9eb260814..f1aa706c3 100644 --- a/test/test_qa_checkers.py +++ b/test/test_qa_checkers.py @@ -4,7 +4,7 @@ import pytest from dcicutils import contribution_utils as contribution_utils_module -from dcicutils.contribution_utils import BasicContributions, Contributions +from dcicutils.contribution_utils import BasicContributions from dcicutils.misc_utils import lines_printed_to, remove_prefix from dcicutils.qa_checkers import ( DebuggingArtifactChecker, DocsChecker, ChangeLogChecker, VersionChecker, confirm_no_uses, find_uses, @@ -278,6 +278,7 @@ def test_contribution_checker(): some_repo_name = 'foo' mfs = MockFileSystem() with mfs.mock_exists_open_remove_abspath_getcwd_chdir(): + # Predict which cache file we'll need, so we can make it ahead of time. contributions_cache_file = BasicContributions(repo=some_repo_name).contributors_json_file() print(f"contributions_cache_file={contributions_cache_file}") os.chdir(os.path.join(SAMPLE_PROJECT_HOME, some_repo_name)) From 663616f49dfa05dbb6c9a1657de70c3cfd5ef266 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 13:25:55 -0400 Subject: [PATCH 14/17] More test coverage. --- dcicutils/contribution_utils.py | 129 ++++++++++++++++---------------- pyproject.toml | 2 +- test/test_contribution_utils.py | 73 +++++++++++++++++- 3 files changed, 138 insertions(+), 66 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 036fc3d7a..41dd84a9f 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -198,6 +198,71 @@ def email_reference_time(self, email): def name_reference_time(self, name): return self.name_timestamps.get(name) + @classmethod + def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]): + if contributor_index is None: + return None + else: + return { + key: contributor.as_dict() + for key, contributor in contributor_index.items() + } + + @classmethod + def contributor_values_as_objects(cls, contributor_index: Optional[Dict]): + if contributor_index is None: + return None + else: + return { + key: Contributor.from_dict(value, key=key) + for key, value in contributor_index.items() + } + + def checkpoint_state(self): + return self.as_dict() + + def as_dict(self): + data = { + "forked_at": self.forked_at.isoformat() if self.forked_at else None, + "excluded_fork": self.exclude_fork, + "pre_fork_contributors_by_name": self.contributor_values_as_dicts(self.pre_fork_contributors_by_name), + "contributors_by_name": self.contributor_values_as_dicts(self.contributors_by_name), + } + return data + + def save_contributor_data(self, filename: Optional[str] = None) -> str: + if filename is None: + filename = self.contributors_json_file() + with io.open(filename, 'w') as fp: + PRINT(json.dumps(self.as_dict(), indent=2), file=fp) + return filename + + def repo_contributor_names(self, with_email=False): + for name, contributor in self.contributors_by_name.items(): + if with_email: + yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})" + else: + yield name + + @classmethod + def pretty_email(cls, email): + m = GITHUB_USER_REGEXP.match(email) + if m: + user_name = m.group(1) + return f"{user_name}@github" + else: + return email + + @classmethod + def get_contributors_json_from_file_cache(cls, filename): + try: + with io.open(filename, 'r') as fp: + data = json.load(fp) + except Exception: + PRINT(f"Error while reading data from {filename!r}.") + raise + return data + class Contributions(BasicContributions): @@ -239,25 +304,6 @@ def list_to_dict_normalizer(*, label, item): if removed: cache_discrepancies['to_remove'] = removed - @classmethod - def pretty_email(cls, email): - m = GITHUB_USER_REGEXP.match(email) - if m: - user_name = m.group(1) - return f"{user_name}@github" - else: - return email - - @classmethod - def get_contributors_json_from_file_cache(cls, filename): - try: - with io.open(filename, 'r') as fp: - data = json.load(fp) - except Exception: - PRINT(f"Error while reading data from {filename!r}.") - raise - return data - def load_contributors_from_json_file_cache(self, filename): self.loaded_contributor_data = data = self.get_contributors_json_from_file_cache(filename) self.load_from_dict(data) @@ -421,38 +467,6 @@ def traverse(cls, cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, contributors_by_name=contributors_by_name, seen=seen) - @classmethod - def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]): - if contributor_index is None: - return None - else: - return { - key: contributor.as_dict() - for key, contributor in contributor_index.items() - } - - @classmethod - def contributor_values_as_objects(cls, contributor_index: Optional[Dict]): - if contributor_index is None: - return None - else: - return { - key: Contributor.from_dict(value, key=key) - for key, value in contributor_index.items() - } - - def checkpoint_state(self): - return self.as_dict() - - def as_dict(self): - data = { - "forked_at": self.forked_at.isoformat() if self.forked_at else None, - "excluded_fork": self.exclude_fork, - "pre_fork_contributors_by_name": self.contributor_values_as_dicts(self.pre_fork_contributors_by_name), - "contributors_by_name": self.contributor_values_as_dicts(self.contributors_by_name), - } - return data - def load_from_dict(self, data: Dict): forked_at: Optional[str] = data.get('forked_at') excluded_fork = data.get('excluded_fork') @@ -494,19 +508,6 @@ def by_email_from_by_name(cls, contributors_by_email_json): result[email] = entry return result - def save_contributor_data(self, filename: Optional[str] = None): - if filename is None: - filename = self.contributors_json_file() - with io.open(filename, 'w') as fp: - PRINT(json.dumps(self.as_dict(), indent=2), file=fp) - - def repo_contributor_names(self, with_email=False): - for name, contributor in self.contributors_by_name.items(): - if with_email: - yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})" - else: - yield name - def show_repo_contributors(self, analyze_discrepancies: bool = True, with_email: bool = True, error_class: Optional[Type[BaseException]] = None): for author_name in self.repo_contributor_names(with_email=with_email): diff --git a/pyproject.toml b/pyproject.toml index 9964b37dc..b8a5ec94f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.6.0.2b4" +version = "7.6.0.2b5" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index b0d3ce155..fee2def01 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -9,7 +9,7 @@ from dcicutils import contribution_utils as contribution_utils_module from dcicutils.contribution_utils import Contributor, BasicContributions, Contributions, GitAnalysis -from dcicutils.misc_utils import make_counter # , override_environ +from dcicutils.misc_utils import make_counter, file_contents # , override_environ from dcicutils.qa_utils import MockId, MockFileSystem, printed_output from typing import List, Optional from unittest import mock @@ -557,3 +557,74 @@ def test_file_cache_error_reporting(): Contributions.get_contributors_json_from_file_cache(Contributions.CONTRIBUTORS_CACHE_FILE,) assert re.match("Expecting.*line 1 column 2.*", str(exc.value)) assert printed.lines == ["Error while reading data from 'CONTRIBUTORS.json'."] + + +def test_save_contributor_data(): + + mfs = MockFileSystem() + + with mfs.mock_exists_open_remove_abspath_getcwd_chdir(): + + contributions = BasicContributions() + cache_file = contributions.save_contributor_data() + + assert file_contents(cache_file) == ( + '{\n' + ' "forked_at": null,\n' + ' "excluded_fork": null,\n' + ' "pre_fork_contributors_by_name": null,\n' + ' "contributors_by_name": null\n' + '}\n' + ) + + cache_file_2 = contributions.save_contributor_data('some.file') + assert cache_file_2 == 'some.file' + assert file_contents(cache_file_2) == ( + '{\n' + ' "forked_at": null,\n' + ' "excluded_fork": null,\n' + ' "pre_fork_contributors_by_name": null,\n' + ' "contributors_by_name": null\n' + '}\n' + ) + + +def test_repo_contributor_names(): + + contributions = BasicContributions() + tony = Contributor(names={"Tony", "Anthony"}, emails={"tony@foo"}) + juan = Contributor(names={"Juan"}, emails={"juan@foo"}) + contributions.contributors_by_name = { + "Tony": tony, + "Juan": juan, + } + assert list(contributions.repo_contributor_names(with_email=False)) == [ + "Tony", + "Juan", + ] + assert list(contributions.repo_contributor_names(with_email=True)) == [ + "Tony (tony@foo)", + "Juan (juan@foo)", + ] + # We assume de-duplication has already occurred, so something like this will happen. + # This behavior is not a feature, but then again, there's no reason to check/redo what should be done earlier. + contributions.contributors_by_name = { + "Tony": tony, + "Juan": juan, + "Anthony": tony, + } + assert list(contributions.repo_contributor_names(with_email=True)) == [ + "Tony (tony@foo)", + "Juan (juan@foo)", + "Anthony (tony@foo)", + ] + # Note, too, that it's trusting the key because our save code puts the primary_key in the dictionary key, + # and if it's been edited by a human, we'll prefer that. + contributions.contributors_by_name = { + "Tony": tony, + "John": juan, + } + assert list(contributions.repo_contributor_names(with_email=True)) == [ + "Tony (tony@foo)", + "John (juan@foo)", + ] From 902f5c699c9e5e8e9afc5716626664279bf07d51 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 14:46:14 -0400 Subject: [PATCH 15/17] Update to use python system logger in license_utils --- dcicutils/license_utils.py | 15 ++++++++------- pyproject.toml | 2 +- test/test_license_utils.py | 8 ++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/dcicutils/license_utils.py b/dcicutils/license_utils.py index b2124a43d..a469001e7 100644 --- a/dcicutils/license_utils.py +++ b/dcicutils/license_utils.py @@ -2,10 +2,11 @@ import datetime import io import json -import logging +# import logging import os import re import subprocess +import warnings try: import piplicenses @@ -30,8 +31,8 @@ from dcicutils.misc_utils import PRINT, get_error_message, local_attrs -logging.basicConfig() -logger = logging.getLogger(__name__) +# logging.basicConfig() +# logger = logging.getLogger(__name__) _FRAMEWORK = 'framework' _LANGUAGE = 'language' @@ -270,7 +271,7 @@ def report(message): if analysis: analysis.miscellaneous.append(message) else: - logger.warning(message) + warnings.warn(message) parsed = cls.parse_simple_license_file(filename=filename) if check_license_title: license_title = parsed['license-title'] @@ -488,11 +489,11 @@ class MyOrgLicenseChecker(LicenseChecker): cls.analyze_license_file(analysis=analysis) cls.show_unacceptable_licenses(analysis=analysis) if analysis.unexpected_missing: - logger.warning(there_are(analysis.unexpected_missing, kind='unexpectedly missing license', punctuate=True)) + warnings.warn(there_are(analysis.unexpected_missing, kind='unexpectedly missing license', punctuate=True)) if analysis.no_longer_missing: - logger.warning(there_are(analysis.no_longer_missing, kind='no-longer-missing license', punctuate=True)) + warnings.warn(there_are(analysis.no_longer_missing, kind='no-longer-missing license', punctuate=True)) for message in analysis.miscellaneous: - logger.warning(message) + warnings.warn(message) if analysis.unacceptable: raise LicenseAcceptabilityCheckFailure(unacceptable_licenses=analysis.unacceptable) diff --git a/pyproject.toml b/pyproject.toml index b8a5ec94f..dedb44d99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.6.0.2b5" +version = "7.6.0.2b6" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_license_utils.py b/test/test_license_utils.py index ea7d7666f..4f1f881ed 100644 --- a/test/test_license_utils.py +++ b/test/test_license_utils.py @@ -11,7 +11,7 @@ LicenseFrameworkRegistry, LicenseFramework, PythonLicenseFramework, JavascriptLicenseFramework, LicenseAnalysis, LicenseChecker, LicenseStatus, LicenseFileParser, LicenseCheckFailure, LicenseOwnershipCheckFailure, LicenseAcceptabilityCheckFailure, - logger as license_logger, + warnings as license_utils_warnings_module, ) from dcicutils.misc_utils import ignored, file_contents from dcicutils.qa_utils import printed_output, MockFileSystem @@ -371,7 +371,7 @@ def check_license_checker_full_scenario_failing(*, perturb_setup, checker, file=fp) print(license_text_for_testing, file=fp) - with mock.patch.object(license_logger, 'warning') as mock_license_logger: + with mock.patch.object(license_utils_warnings_module, 'warn') as mock_license_logger: license_warnings = [] @@ -520,7 +520,7 @@ def test_license_checker_full_scenario_succeeding(): with mock.patch.object(PythonLicenseFramework, "get_dependencies") as mock_get_dependencies: - with mock.patch.object(license_logger, 'warning') as mock_license_logger: + with mock.patch.object(license_utils_warnings_module, 'warn') as mock_license_logger: license_warnings = [] @@ -655,7 +655,7 @@ def test_validate_simple_license_file(): with mfs.mock_exists_open_remove(): - with mock.patch.object(license_logger, 'warning') as mock_license_logger: + with mock.patch.object(license_utils_warnings_module, 'warn') as mock_license_logger: license_warnings = [] From 24276de33232741969d0460e920599bce60b9382 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 15:44:46 -0400 Subject: [PATCH 16/17] Some tweaks based on beta trials in snovault. --- dcicutils/license_utils.py | 10 ++++++++-- test/test_license_utils.py | 5 +++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dcicutils/license_utils.py b/dcicutils/license_utils.py index a469001e7..698379470 100644 --- a/dcicutils/license_utils.py +++ b/dcicutils/license_utils.py @@ -491,7 +491,8 @@ class MyOrgLicenseChecker(LicenseChecker): if analysis.unexpected_missing: warnings.warn(there_are(analysis.unexpected_missing, kind='unexpectedly missing license', punctuate=True)) if analysis.no_longer_missing: - warnings.warn(there_are(analysis.no_longer_missing, kind='no-longer-missing license', punctuate=True)) + # This is not so major as to need a warning, but it's still something that should show up somewhere. + PRINT(there_are(analysis.no_longer_missing, kind='no-longer-missing license', punctuate=True)) for message in analysis.miscellaneous: warnings.warn(message) if analysis.unacceptable: @@ -703,6 +704,11 @@ class C4InfrastructureLicenseChecker(LicenseChecker): # Ref: https://github.com/getsentry/responses/blob/master/LICENSE 'responses', + # This seems to get flagged sometimes, but is not the pypi snovault library, it's what our dcicsnovault + # calls itself internally.. In any case, it's under MIT license and OK. + # Ref: https://github.com/4dn-dcic/snovault/blob/master/LICENSE.txt + 'snovault', + # PyPi identifies the supervisor library license as "BSD-derived (http://www.repoze.org/LICENSE.txt)" # Ref: https://pypi.org/project/supervisor/ # In fact, though, the license is a bit more complicated, though apparently still permissive. @@ -711,7 +717,7 @@ class C4InfrastructureLicenseChecker(LicenseChecker): # This seems to be a BSD-3-Clause-Modification license. # Ref: https://github.com/Pylons/translationstring/blob/master/LICENSE.txt - 'translationstring', + # 'translationstring', # This seems to be a BSD-3-Clause-Modification license. # Ref: https://github.com/Pylons/venusian/blob/master/LICENSE.txt diff --git a/test/test_license_utils.py b/test/test_license_utils.py index 4f1f881ed..670b5b0e5 100644 --- a/test/test_license_utils.py +++ b/test/test_license_utils.py @@ -326,7 +326,8 @@ def checker(printed, license_warnings): ' Bar License: library2, library8, libraryA, libraryB', ' Baz License: library4, library7, library9', ' Big-Org-Approved: libraryB', - ' Misc-Copyleft: libraryD'] + ' Misc-Copyleft: libraryD', + 'There is 1 no-longer-missing license: library1.'] javascript_failure = None for warning in license_warnings: @@ -342,7 +343,7 @@ def checker(printed, license_warnings): assert edited_license_warnings == [ "There is 1 unexpectedly missing license: library6.", - "There is 1 no-longer-missing license: library1.", + # "There is 1 no-longer-missing license: library1.", ] do_it() From 6b62ec8dc0dd2f01782ae960b0afca59d8876fa4 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 15:45:03 -0400 Subject: [PATCH 17/17] Bump beta version. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dedb44d99..2715c6ac4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.6.0.2b6" +version = "7.6.0.2b7" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"