Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: update gitlab repo remote url #354

Merged
merged 9 commits into from
Jul 16, 2023
4 changes: 4 additions & 0 deletions src/macaron/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ class ConfigurationError(MacaronError):

class CloneError(MacaronError):
"""Happens when cannot clone a git repository."""


class RepoCheckOutError(MacaronError):
"""Happens when there is an error when checking out the correct revision of a git repository."""
34 changes: 29 additions & 5 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
NoneDependencyAnalyzer,
)
from macaron.dependency_analyzer.cyclonedx import get_deps_from_sbom
from macaron.errors import CloneError
from macaron.errors import CloneError, RepoCheckOutError
from macaron.output_reporter.reporter import FileReporter
from macaron.output_reporter.results import Record, Report, SCMStatus
from macaron.slsa_analyzer import git_url
Expand Down Expand Up @@ -477,7 +477,7 @@ def _prepare_repo(
repo_path: str,
branch_name: str = "",
digest: str = "",
) -> Git:
) -> Git | None:
"""Prepare the target repository for analysis.

If ``repo_path`` is a remote path, the target repo is cloned to ``{target_dir}/{unique_path}``.
Expand All @@ -501,9 +501,10 @@ def _prepare_repo(

Returns
-------
Git
Git | None
The pydriller.Git object of the repository or None if error.
"""
# TODO: separate the logic for handling remote and local repos instead of putting them into this method.
behnazh-w marked this conversation as resolved.
Show resolved Hide resolved
# Cannot specify a commit hash without specifying the branch.
if not branch_name and digest:
logger.error(
Expand Down Expand Up @@ -560,8 +561,31 @@ def _prepare_repo(
logger.error("Cannot reset the target repository.")
return None

if not git_url.check_out_repo_target(git_obj, branch_name, digest, (not is_remote)):
logger.error("Cannot checkout the specific branch or commit of the target repo.")
# Checking out the specific branch or commit. This operation varies depends on the git service that the
# repository uses.
if not is_remote:
# If the repo path provided by the user is a local path, we need to get the actual origin remote URL of
# the repo to decide on the suitable git service.
origin_remote_url = git_url.get_remote_origin_of_local_repo(git_obj)
if git_url.is_remote_repo(origin_remote_url):
# The local repo's origin remote url is a remote URL (e.g https://host.com/a/b): In this case, we obtain
# the corresponding git service using ``self.get_git_service``.
git_service = self.get_git_service(origin_remote_url)
else:
# The local repo's origin remote url is a local path (e.g /path/to/local/...). This happens when the
# target repository is a clone from another local repo or is a clone from a git archive -
# https://git-scm.com/docs/git-archive: In this case, we fall-back to the generic function
# ``git_url.check_out_repo_target``.
if not git_url.check_out_repo_target(git_obj, branch_name, digest, not is_remote):
logger.error("Cannot checkout the specific branch or commit of the target repo.")
return None

return git_obj

try:
git_service.check_out_repo(git_obj, branch_name, digest, not is_remote)
except RepoCheckOutError as error:
logger.error(error)
return None

return git_obj
Expand Down
48 changes: 45 additions & 3 deletions src/macaron/slsa_analyzer/git_service/base_git_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

from abc import abstractmethod

from pydriller.git import Git

from macaron.config.defaults import defaults
from macaron.errors import CloneError, ConfigurationError
from macaron.errors import CloneError, ConfigurationError, RepoCheckOutError
from macaron.slsa_analyzer import git_url


Expand All @@ -27,7 +29,6 @@ def __init__(self, name: str) -> None:
@abstractmethod
def load_defaults(self) -> None:
"""Load the values for this git service from the ini configuration."""
raise NotImplementedError

def load_domain(self, section_name: str) -> str | None:
"""Load the domain of the git service from the ini configuration section ``section_name``.
Expand Down Expand Up @@ -110,7 +111,32 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
CloneError
If there is an error cloning the repo.
"""
raise NotImplementedError()

@abstractmethod
def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
"""Checkout the branch and commit specified by the user of a repository.

Parameters
----------
git_obj : Git
The Git object for the repository to check out.
branch : str
The branch to check out.
digest : str
The sha of the commit to check out.
offline_mode: bool
If true, no fetching is performed.

Returns
-------
Git
The same Git object from the input.

Raises
------
RepoError
If there is an error while checking out the specific branch or commit.
"""


class NoneGitService(BaseGitService):
Expand Down Expand Up @@ -154,3 +180,19 @@ def clone_repo(self, _clone_dir: str, url: str) -> None:
Always raise, since this method should not be used to clone any repository.
"""
raise CloneError(f"Internal error encountered when cloning the repo '{url}'.")

def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
"""Checkout the branch and commit specified by the user of a repository.

In this particular case, since this class represents a ``None`` git service,
we do nothing but raise a ``RepoError``.

Raises
------
RepoError
Always raise, since this method should not be used to check out in any repository.
"""
raise RepoCheckOutError(
f"Cannot check out branch {branch} and commit {digest} for repo {git_obj.project_name} "
+ "from an empty git service"
)
7 changes: 7 additions & 0 deletions src/macaron/slsa_analyzer/git_service/bitbucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import logging

from pydriller.git import Git

from macaron.errors import RepoCheckOutError
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService

logger: logging.Logger = logging.getLogger(__name__)
Expand All @@ -26,3 +29,7 @@ def clone_repo(self, _clone_dir: str, _url: str) -> None:
"""Clone a BitBucket repo."""
# TODO: implement this once support for BitBucket is added.
logger.info("Cloning BitBucket repositories is not supported yet. Please clone the repository manually.")

def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
"""Checkout the branch and commit specified by the user of a repository."""
raise RepoCheckOutError("Checking out a branch or commit on a Bitbucket repository is not supported yet.")
35 changes: 34 additions & 1 deletion src/macaron/slsa_analyzer/git_service/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

"""This module contains the spec for the GitHub service."""

from pydriller.git import Git

from macaron.config.global_config import global_config
from macaron.errors import ConfigurationError
from macaron.errors import ConfigurationError, RepoCheckOutError
from macaron.slsa_analyzer import git_url
from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
Expand Down Expand Up @@ -56,3 +58,34 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
If there is an error cloning the repo.
"""
git_url.clone_remote_repo(clone_dir, url)

def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
"""Checkout the branch and commit specified by the user of a repository.

Parameters
----------
git_obj : Git
The Git object for the repository to check out.
branch : str
The branch to check out.
digest : str
The sha of the commit to check out.
offline_mode: bool
If true, no fetching is performed.

Returns
-------
Git
The same Git object from the input.

Raises
------
RepoError
If there is error while checkout the specific branch and digest.
"""
if not git_url.check_out_repo_target(git_obj, branch, digest, offline_mode):
raise RepoCheckOutError(
f"Failed to check out branch {branch} and commit {digest} for repo {git_obj.project_name}."
)

return git_obj
81 changes: 79 additions & 2 deletions src/macaron/slsa_analyzer/git_service/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
from abc import abstractmethod
from urllib.parse import ParseResult, urlunparse

from macaron.errors import CloneError, ConfigurationError
from pydriller.git import Git

from macaron.errors import CloneError, ConfigurationError, RepoCheckOutError
from macaron.slsa_analyzer import git_url
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService

Expand Down Expand Up @@ -103,6 +105,10 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
To clone a GitLab repository with access token, we embed the access token in the https URL.
See GitLab documentation: https://docs.gitlab.com/ee/gitlab-basics/start-using-git.html#clone-using-a-token.

If we clone using the https URL with the token embedded, this URL will be stored as plain text in .git/config as
the origin remote URL. Therefore, after a repository is cloned, this remote origin URL will be set
with the value of the original ``url`` (which does not have the embedded token).

Parameters
----------
clone_dir: str
Expand All @@ -117,7 +123,78 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
If there is an error cloning the repository.
"""
clone_url = self.construct_clone_url(url)
git_url.clone_remote_repo(clone_dir, clone_url)
# In the ``git_url.clone_remote_repo`` function, CloneError exception is raised whenever the repository
# has not been cloned or the clone attempts failed.
# In both cases, the repository would not be available on the file system to contain the token-included URL.
# Therefore, we don't need to catch and handle the CloneError exceptions here.
repo = git_url.clone_remote_repo(clone_dir, clone_url)
behnazh-w marked this conversation as resolved.
Show resolved Hide resolved

# If ``git_url.clone_remote_repo`` returns an Repo instance, this means that the repository is freshly cloned
# with the token embedded URL. We will set its value back to the original non-token URL.
# If ``git_url.clone_remote_repo`` returns None, it means that the repository already exists so we don't need
# to do anything.
if repo:
try:
origin_remote = repo.remote("origin")
except ValueError as error:
raise CloneError("Cannot find the remote origin for this repository.") from error

origin_remote.set_url(url)

def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
"""Checkout the branch and commit specified by the user of a repository.

For GitLab, this method set the origin remote URL of the target repository to the token-embedded URL if
a token is available before performing the checkout operation.

After the checkout operation finishes, the origin remote URL is set back again to ensure that no token-embedded
URL remains.

Parameters
----------
git_obj : Git
The Git object for the repository to check out.
branch : str
The branch to check out.
digest : str
The sha of the commit to check out.
offline_mode: bool
If true, no fetching is performed.

Returns
-------
Git
The same Git object from the input.

Raises
------
RepoError
If there is error while checkout the specific branch and digest.
"""
remote_origin_url = git_url.get_remote_origin_of_local_repo(git_obj)

try:
origin_remote = git_obj.repo.remote("origin")
except ValueError as error:
raise RepoCheckOutError("Cannot find the remote origin for this repository.") from error

try:
reconstructed_url = self.construct_clone_url(remote_origin_url)
except CloneError as error:
raise RepoCheckOutError("Cannot parse the remote origin URL of this repository.") from error

origin_remote.set_url(reconstructed_url, remote_origin_url)

check_out_status = git_url.check_out_repo_target(git_obj, branch, digest, offline_mode)

origin_remote.set_url(remote_origin_url, reconstructed_url)

if not check_out_status:
raise RepoCheckOutError(
f"Failed to check out branch {branch} and commit {digest} for repo {git_obj.project_name}."
)

return git_obj


class SelfHostedGitLab(GitLab):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a test file.
Loading
Loading