From 663616f49dfa05dbb6c9a1657de70c3cfd5ef266 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 28 Jul 2023 13:25:55 -0400 Subject: [PATCH] More test coverage. --- dcicutils/contribution_utils.py | 129 ++++++++++++++++---------------- pyproject.toml | 2 +- test/test_contribution_utils.py | 73 +++++++++++++++++- 3 files changed, 138 insertions(+), 66 deletions(-) diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index 036fc3d7a..41dd84a9f 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -198,6 +198,71 @@ def email_reference_time(self, email): def name_reference_time(self, name): return self.name_timestamps.get(name) + @classmethod + def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]): + if contributor_index is None: + return None + else: + return { + key: contributor.as_dict() + for key, contributor in contributor_index.items() + } + + @classmethod + def contributor_values_as_objects(cls, contributor_index: Optional[Dict]): + if contributor_index is None: + return None + else: + return { + key: Contributor.from_dict(value, key=key) + for key, value in contributor_index.items() + } + + def checkpoint_state(self): + return self.as_dict() + + def as_dict(self): + data = { + "forked_at": self.forked_at.isoformat() if self.forked_at else None, + "excluded_fork": self.exclude_fork, + "pre_fork_contributors_by_name": self.contributor_values_as_dicts(self.pre_fork_contributors_by_name), + "contributors_by_name": self.contributor_values_as_dicts(self.contributors_by_name), + } + return data + + def save_contributor_data(self, filename: Optional[str] = None) -> str: + if filename is None: + filename = self.contributors_json_file() + with io.open(filename, 'w') as fp: + PRINT(json.dumps(self.as_dict(), indent=2), file=fp) + return filename + + def repo_contributor_names(self, with_email=False): + for name, contributor in self.contributors_by_name.items(): + if with_email: + yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})" + else: + yield name + + @classmethod + def pretty_email(cls, email): + m = GITHUB_USER_REGEXP.match(email) + if m: + user_name = m.group(1) + return f"{user_name}@github" + else: + return email + + @classmethod + def get_contributors_json_from_file_cache(cls, filename): + try: + with io.open(filename, 'r') as fp: + data = json.load(fp) + except Exception: + PRINT(f"Error while reading data from {filename!r}.") + raise + return data + class Contributions(BasicContributions): @@ -239,25 +304,6 @@ def list_to_dict_normalizer(*, label, item): if removed: cache_discrepancies['to_remove'] = removed - @classmethod - def pretty_email(cls, email): - m = GITHUB_USER_REGEXP.match(email) - if m: - user_name = m.group(1) - return f"{user_name}@github" - else: - return email - - @classmethod - def get_contributors_json_from_file_cache(cls, filename): - try: - with io.open(filename, 'r') as fp: - data = json.load(fp) - except Exception: - PRINT(f"Error while reading data from {filename!r}.") - raise - return data - def load_contributors_from_json_file_cache(self, filename): self.loaded_contributor_data = data = self.get_contributors_json_from_file_cache(filename) self.load_from_dict(data) @@ -421,38 +467,6 @@ def traverse(cls, cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, contributors_by_name=contributors_by_name, seen=seen) - @classmethod - def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]): - if contributor_index is None: - return None - else: - return { - key: contributor.as_dict() - for key, contributor in contributor_index.items() - } - - @classmethod - def contributor_values_as_objects(cls, contributor_index: Optional[Dict]): - if contributor_index is None: - return None - else: - return { - key: Contributor.from_dict(value, key=key) - for key, value in contributor_index.items() - } - - def checkpoint_state(self): - return self.as_dict() - - def as_dict(self): - data = { - "forked_at": self.forked_at.isoformat() if self.forked_at else None, - "excluded_fork": self.exclude_fork, - "pre_fork_contributors_by_name": self.contributor_values_as_dicts(self.pre_fork_contributors_by_name), - "contributors_by_name": self.contributor_values_as_dicts(self.contributors_by_name), - } - return data - def load_from_dict(self, data: Dict): forked_at: Optional[str] = data.get('forked_at') excluded_fork = data.get('excluded_fork') @@ -494,19 +508,6 @@ def by_email_from_by_name(cls, contributors_by_email_json): result[email] = entry return result - def save_contributor_data(self, filename: Optional[str] = None): - if filename is None: - filename = self.contributors_json_file() - with io.open(filename, 'w') as fp: - PRINT(json.dumps(self.as_dict(), indent=2), file=fp) - - def repo_contributor_names(self, with_email=False): - for name, contributor in self.contributors_by_name.items(): - if with_email: - yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})" - else: - yield name - def show_repo_contributors(self, analyze_discrepancies: bool = True, with_email: bool = True, error_class: Optional[Type[BaseException]] = None): for author_name in self.repo_contributor_names(with_email=with_email): diff --git a/pyproject.toml b/pyproject.toml index 9964b37dc..b8a5ec94f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.6.0.2b4" +version = "7.6.0.2b5" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index b0d3ce155..fee2def01 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -9,7 +9,7 @@ from dcicutils import contribution_utils as contribution_utils_module from dcicutils.contribution_utils import Contributor, BasicContributions, Contributions, GitAnalysis -from dcicutils.misc_utils import make_counter # , override_environ +from dcicutils.misc_utils import make_counter, file_contents # , override_environ from dcicutils.qa_utils import MockId, MockFileSystem, printed_output from typing import List, Optional from unittest import mock @@ -557,3 +557,74 @@ def test_file_cache_error_reporting(): Contributions.get_contributors_json_from_file_cache(Contributions.CONTRIBUTORS_CACHE_FILE,) assert re.match("Expecting.*line 1 column 2.*", str(exc.value)) assert printed.lines == ["Error while reading data from 'CONTRIBUTORS.json'."] + + +def test_save_contributor_data(): + + mfs = MockFileSystem() + + with mfs.mock_exists_open_remove_abspath_getcwd_chdir(): + + contributions = BasicContributions() + cache_file = contributions.save_contributor_data() + + assert file_contents(cache_file) == ( + '{\n' + ' "forked_at": null,\n' + ' "excluded_fork": null,\n' + ' "pre_fork_contributors_by_name": null,\n' + ' "contributors_by_name": null\n' + '}\n' + ) + + cache_file_2 = contributions.save_contributor_data('some.file') + assert cache_file_2 == 'some.file' + assert file_contents(cache_file_2) == ( + '{\n' + ' "forked_at": null,\n' + ' "excluded_fork": null,\n' + ' "pre_fork_contributors_by_name": null,\n' + ' "contributors_by_name": null\n' + '}\n' + ) + + +def test_repo_contributor_names(): + + contributions = BasicContributions() + tony = Contributor(names={"Tony", "Anthony"}, emails={"tony@foo"}) + juan = Contributor(names={"Juan"}, emails={"juan@foo"}) + contributions.contributors_by_name = { + "Tony": tony, + "Juan": juan, + } + assert list(contributions.repo_contributor_names(with_email=False)) == [ + "Tony", + "Juan", + ] + assert list(contributions.repo_contributor_names(with_email=True)) == [ + "Tony (tony@foo)", + "Juan (juan@foo)", + ] + # We assume de-duplication has already occurred, so something like this will happen. + # This behavior is not a feature, but then again, there's no reason to check/redo what should be done earlier. + contributions.contributors_by_name = { + "Tony": tony, + "Juan": juan, + "Anthony": tony, + } + assert list(contributions.repo_contributor_names(with_email=True)) == [ + "Tony (tony@foo)", + "Juan (juan@foo)", + "Anthony (tony@foo)", + ] + # Note, too, that it's trusting the key because our save code puts the primary_key in the dictionary key, + # and if it's been edited by a human, we'll prefer that. + contributions.contributors_by_name = { + "Tony": tony, + "John": juan, + } + assert list(contributions.repo_contributor_names(with_email=True)) == [ + "Tony (tony@foo)", + "John (juan@foo)", + ]