Skip to content

Commit

Permalink
Add community health file based metrics (#115)
Browse files Browse the repository at this point in the history
* restructure for community health metrics

* linting

* add file check function and data to report

* reset to main; add test for file metrics

* add code of conduct reference file

* add test for the almanack
  • Loading branch information
d33bs authored Oct 22, 2024
1 parent b19b475 commit aecce23
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 37 deletions.
3 changes: 3 additions & 0 deletions CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Code of Conduct

Please see our organization-wide [CODE_OF_CONDUCT.md](https://github.com/software-gardening/.github/blob/main/CODE_OF_CONDUCT.md) for more information.
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ requests = "^2.32.3"
kaleido = "0.2.1"
pygithub = "^2.3.0"
jupyterlab-spellchecker = "^0.8.4"
jsonschema = "^4.23.0"

[tool.poetry.scripts]
almanack = "almanack.reporting.cli:trigger"
Expand Down
77 changes: 66 additions & 11 deletions src/almanack/metrics/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,54 @@ def get_table(repo_path: str) -> Dict[str, Any]:
]


def file_exists_in_repo(
repo: pygit2.Repository,
expected_file_name: str,
check_extension: bool = False,
extensions: list[str] = [".md", ""],
) -> bool:
"""
Check if a file (case-insensitive and with optional extensions)
exists in the latest commit of the repository.
Args:
repo (pygit2.Repository):
The repository object to search in.
expected_file_name (str):
The base file name to check (e.g., "readme").
check_extension (bool):
Whether to check the extension of the file or not.
extensions (list[str]):
List of possible file extensions to check (e.g., [".md", ""]).
Returns:
bool:
True if the file exists, False otherwise.
"""

# Gather a tree from the HEAD of the repo
tree = repo.revparse_single("HEAD").tree

# Normalize expected file name to lowercase for case-insensitive comparison
expected_file_name = expected_file_name.lower()

for entry in tree:
# Normalize entry name to lowercase
entry_name = entry.name.lower()

# Check if the base file name matches with any allowed extension
if check_extension and any(
entry_name == f"{expected_file_name}{ext.lower()}" for ext in extensions
):
return True

# Check whether the filename without an extension matches the expected file name
if not check_extension and entry_name.split(".", 1)[0] == expected_file_name:
return True

return False


def compute_repo_data(repo_path: str) -> None:
"""
Computes comprehensive data for a GitHub repository.
Expand All @@ -75,13 +123,7 @@ def compute_repo_data(repo_path: str) -> None:
repo_path (str): The local path to the Git repository.
Returns:
dict: A dictionary containing the following key-value pairs:
- "repo_path": The path of the repository.
- "total_normalized_entropy": The total normalized entropy calculated for the repository.
- "number_of_commits": The total number of commits in the repository.
- "number_of_files": The number of files that have been edited between the first and most recent commit.
- "time_range_of_commits": A tuple containing the dates of the first and most recent commits.
- "file_level_entropy": A dictionary of entropy values for each file.
dict: A dictionary containing data key-pairs.
"""
try:
# Convert repo_path to an absolute path and initialize the repository
Expand Down Expand Up @@ -122,10 +164,26 @@ def compute_repo_data(repo_path: str) -> None:
# Return the data structure
return {
"repo_path": str(repo_path),
"normalized_total_entropy": normalized_total_entropy,
"number_of_commits": len(commits),
"number_of_files": len(file_names),
"time_range_of_commits": (first_commit_date, most_recent_commit_date),
"readme-included": file_exists_in_repo(
repo=repo,
expected_file_name="readme",
),
"contributing-included": file_exists_in_repo(
repo=repo,
expected_file_name="contributing",
),
"code-of-conduct-included": file_exists_in_repo(
repo=repo,
expected_file_name="code_of_conduct",
),
"license-included": file_exists_in_repo(
repo=repo,
expected_file_name="license",
),
"normalized_total_entropy": normalized_total_entropy,
"file_level_entropy": file_entropy,
}

Expand All @@ -134,9 +192,6 @@ def compute_repo_data(repo_path: str) -> None:
return {"repo_path": str(repo_path), "error": str(e)}


from typing import Any, Dict


def compute_pr_data(repo_path: str, pr_branch: str, main_branch: str) -> Dict[str, Any]:
"""
Computes entropy data for a PR compared to the main branch.
Expand Down
28 changes: 28 additions & 0 deletions src/almanack/metrics/metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,34 @@ metrics:
result-data-key: "time_range_of_commits"
description: >-
Starting commit and most recent commit for the repository.
- name: "includes-readme"
id: "SGA-GL-0001"
result-type: "bool"
result-data-key: "readme-included"
description: >-
Boolean value indicating the presence of a README file
in the repository.
- name: "includes-contributing"
id: "SGA-GL-0002"
result-type: "bool"
result-data-key: "contributing-included"
description: >-
Boolean value indicating the presence of a CONTRIBUTING file
in the repository.
- name: "includes-code-of-conduct"
id: "SGA-GL-0003"
result-type: "bool"
result-data-key: "code-of-conduct-included"
description: >-
Boolean value indicating the presence of a CODE_OF_CONDUCT file
in the repository.
- name: "includes-license"
id: "SGA-GL-0004"
result-type: "bool"
result-data-key: "license-included"
description: >-
Boolean value indicating the presence of a LICENSE file
in the repository.
- name: "agg-info-entropy"
id: "SGA-VS-0001"
result-type: "float"
Expand Down
23 changes: 19 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@

import pytest

from tests.data.almanack.repo_setup.create_repo import create_repositories
from tests.data.almanack.repo_setup.create_repo import (
create_community_health_repository,
create_entropy_repositories,
)

from .utils import check_subproc_run_for_nonzero

Expand Down Expand Up @@ -67,16 +70,16 @@ def build_jupyter_book(


@pytest.fixture(scope="session")
def repository_paths(tmp_path_factory):
def entropy_repository_paths(tmp_path_factory):
"""
Fixture to call create_repositories, create the repositories, then delete them
Fixture to call create_entropy_repositories, create the repositories, then delete them
using the tmp_path_factory fixture to provide a temporary directory for tests.
"""
# Create a base temporary directory
base_path = tmp_path_factory.mktemp("almanack_entropy")

# Run create_repositories with the base_path argument
create_repositories(base_path)
create_entropy_repositories(base_path)

repositories = {
"3_file_repo": base_path / "3_file_repo",
Expand All @@ -99,3 +102,15 @@ def repo_file_sets():
"3_file_repo": ["file_1.md", "file_2.md", "file_3.md"],
"1_file_repo": ["file_1.md"],
}


@pytest.fixture(scope="session")
def community_health_repository_path(tmp_path_factory):
"""
Fixture to call create_community_health_repository, create the repositories, then delete them
using the tmp_path_factory fixture to provide a temporary directory for tests.
"""
# Create a base temporary directory
base_path = tmp_path_factory.mktemp("almanack_community_health")

yield create_community_health_repository(base_path)
47 changes: 46 additions & 1 deletion tests/data/almanack/repo_setup/create_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def commit_changes(repo_path: pathlib.Path, message: str) -> None:
repo.set_head("refs/heads/main")


def create_repositories(base_path: pathlib.Path) -> None:
def create_entropy_repositories(base_path: pathlib.Path) -> None:
"""
Sets up Git repositories with baseline content and adds entropy.
Expand Down Expand Up @@ -153,3 +153,48 @@ def create_repositories(base_path: pathlib.Path) -> None:
for repo_name in ["3_file_repo", "1_file_repo"]:
repo_path = base_path / repo_name
commit_changes(repo_path, "Commit with added lines of code")


def create_community_health_repository(base_path: pathlib.Path) -> str:

filenames_and_contents = {
"README.md": "# This is an example readme\n\nWelcome to our repo!",
"CONTRIBUTING.md": "# This is a stub for a CONTRIBUTING.md",
"CODE_OF_CONDUCT.md": "# This is a stub for a CODE_OF_CONDUCT.md",
"LICENSE.txt": "This is an example LICENSE file.",
}

repo_path = base_path / "community_health"
repo_path.mkdir(parents=True, exist_ok=True)
repo = pygit2.init_repository(path=str(repo_path), bare=False)

# Set user.name and user.email in the config
set_repo_user_config(repo)

for filename, content in filenames_and_contents.items():
# add content to each file based on the filenames and contents dict
with open((repo_path / filename).resolve(), "w") as f:
f.write(content)

# add all files to the index
repo.index.add_all()
# write the files to the index
repo.index.write()

# create a tree for the index
tree = repo.index.write_tree()
# gather a default signature author
author = repo.default_signature
repo.create_commit(
"refs/heads/main",
author,
author,
"Committing community health files",
tree,
[],
)

# set the head to the main branch
repo.set_head("refs/heads/main")

return str(repo_path)
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@


def test_calculate_normalized_entropy(
repository_paths: dict[str, pathlib.Path], repo_file_sets: dict[str, list[str]]
entropy_repository_paths: dict[str, pathlib.Path],
repo_file_sets: dict[str, list[str]],
) -> None:
"""
Test the calculate_normalized_entropy function.
"""
for label, repo_path in repository_paths.items():
for label, repo_path in entropy_repository_paths.items():
# Extract two most recent commits: source and target
source_commit, target_commit = get_most_recent_commits(repo_path)

Expand All @@ -35,14 +36,15 @@ def test_calculate_normalized_entropy(


def test_calculate_aggregate_entropy(
repository_paths: dict[str, pathlib.Path], repo_file_sets: dict[str, list[str]]
entropy_repository_paths: dict[str, pathlib.Path],
repo_file_sets: dict[str, list[str]],
) -> None:
"""
Test that calculate_aggregate_entropy function
"""
repo_entropies = {}

for label, repo_path in repository_paths.items():
for label, repo_path in entropy_repository_paths.items():
# Extract two most recent commits: source and target
source_commit, target_commit = get_most_recent_commits(repo_path)
# Call calculate_normalized_entropy function
Expand Down
3 changes: 3 additions & 0 deletions tests/metrics/test_community_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Tests various community health metric functionality.
"""
Loading

0 comments on commit aecce23

Please sign in to comment.