From eaf3aa07dc3f2d2bb321f77056899a039c04e716 Mon Sep 17 00:00:00 2001 From: Eric Brown Date: Wed, 31 Jan 2024 14:34:14 -0800 Subject: [PATCH] Allow using GitHub URLs as targets Running the CLI with a target that starts with https://github.com would be allowed. If it encounters a GitHub URL it will: * Download a zip ball for the repo * Extract the zip file into a temporary directory * Delete the zip file * Run analysis on the files in that temp directory Signed-off-by: Eric Brown --- precli/cli/main.py | 101 +++++++++++++++++++++++++++++++---- precli/core/location.py | 24 ++++++++- precli/renderers/__init__.py | 2 +- precli/renderers/detailed.py | 7 ++- precli/renderers/json.py | 9 +++- precli/renderers/plain.py | 10 +++- requirements.txt | 1 + 7 files changed, 136 insertions(+), 18 deletions(-) diff --git a/precli/cli/main.py b/precli/cli/main.py index 7f6c892c..15e17153 100644 --- a/precli/cli/main.py +++ b/precli/cli/main.py @@ -1,12 +1,17 @@ -# Copyright 2023 Secure Saurce LLC +# Copyright 2024 Secure Saurce LLC import argparse import io import logging import os import pathlib import sys +import tempfile import traceback +import zipfile +from urllib.parse import urljoin +from urllib.parse import urlparse +import requests from ignorelib import IgnoreFilterManager from rich import progress @@ -33,6 +38,7 @@ def _init_logger(log_level=logging.INFO): LOG.handlers = [] logging.captureWarnings(True) LOG.setLevel(log_level) + logging.getLogger("urllib3").setLevel(log_level) handler = logging.StreamHandler(sys.stderr) LOG.addHandler(handler) LOG.debug("logging initialized") @@ -124,30 +130,90 @@ def build_ignore_mgr(path: str, ignore_file: str) -> IgnoreFilterManager: ) +def get_owner_repo(repo_url: str): + # Extract owner and repository name from the URL + path = urlparse(repo_url).path.lstrip("/").split("/") + return path[0], path[1] + + +def get_default_branch(owner: str, repo: str): + api_url = f"https://api.github.com/repos/{owner}/{repo}" + response = requests.get(api_url) + response.raise_for_status() + return response.json().get("default_branch") + + +def extract_github_repo(owner: str, repo: str, branch: str): + base_url = "https://api.github.com/repos" + api_url = f"{base_url}/{owner}/{repo}/zipball/{branch}" + temp_dir = tempfile.mkdtemp() + zip_path = os.path.join(temp_dir, f"{repo}.zip") + + with requests.get(api_url, stream=True) as r: + r.raise_for_status() + with open(zip_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(temp_dir) + + os.remove(zip_path) + + for path in os.listdir(temp_dir): + if path.startswith(f"{owner}-{repo}-"): + temp_dir = os.path.join(temp_dir, path) + + return temp_dir + + +def file_to_url(owner, repo, branch, target, root, file): + target_len = len(target) + prefix = root[target_len:].lstrip("/") + urlpath = f"{owner}/{repo}/blob/{branch}" + rel_path = "/".join([urlpath, prefix, file]) + return urljoin("https://github.com", rel_path) + + def discover_files(targets: list[str], recursive: bool): file_list = [] + file_map = {} + + for target in targets: + if target.startswith("https://github.com"): + owner, repo = get_owner_repo(target) + if repo: + branch = get_default_branch(owner, repo) + target = extract_github_repo(owner, repo, branch) + else: + owner, repo = None - for fname in targets: - if os.path.isdir(fname): - gitignore_mgr = build_ignore_mgr(fname, ".gitignore") - preignore_mgr = build_ignore_mgr(fname, ".preignore") + if os.path.isdir(target): + gitignore_mgr = build_ignore_mgr(target, ".gitignore") + preignore_mgr = build_ignore_mgr(target, ".preignore") if recursive is True: for root, _, files in gitignore_mgr.walk(): for file in files: if not preignore_mgr.is_ignored(file): - file_list.append(os.path.join(root, file)) + path = os.path.join(root, file) + file_list.append(path) + if repo: + file_map[path] = file_to_url( + owner, repo, branch, target, root, file + ) else: - files = os.listdir(path=fname) + files = os.listdir(path=target) for file in files: if not ( gitignore_mgr.is_ignored(file) or preignore_mgr.is_ignored(file) ): - file_list.append(os.path.join(fname, file)) + file_list.append(os.path.join(target, file)) else: - file_list.append(fname) - return file_list + file_list.append(target) + + return file_list, file_map def run_checks(parsers: dict, file_list: list[str]) -> list[Result]: @@ -258,10 +324,23 @@ def main(): parsers = loader.load_parsers(enabled, disabled) # Compile a list of the targets - file_list = discover_files(args.targets, args.recursive) + file_list, file_map = discover_files(args.targets, args.recursive) results, metrics = run_checks(parsers, file_list) + # Set the location url in the result if original target was URL based + for result in results: + net_loc = file_map.get(result.location.file_name) + if net_loc is not None: + if result.location.start_line != result.location.end_line: + lines = ( + f"L{result.location.start_line}-" + f"L{result.location.end_line}" + ) + else: + lines = f"L{result.location.start_line}" + result.location.url = f"{net_loc}#{lines}" + if args.json is True: json = Json(args.no_color) json.render(results, metrics) diff --git a/precli/core/location.py b/precli/core/location.py index 7b031ea3..8e23453e 100644 --- a/precli/core/location.py +++ b/precli/core/location.py @@ -1,4 +1,4 @@ -# Copyright 2023 Secure Saurce LLC +# Copyright 2024 Secure Saurce LLC from tree_sitter import Node @@ -6,6 +6,7 @@ class Location: def __init__( self, file_name: str = None, + url: str = None, node: Node = None, start_line: int = 0, end_line: int = -1, @@ -13,6 +14,7 @@ def __init__( end_column: int = -1, ): self._file_name = file_name + self._url = url if node is not None: self._start_line = node.start_point[0] + 1 self._start_column = node.start_point[1] @@ -35,6 +37,26 @@ def file_name(self) -> str: """ return self._file_name + @property + def url(self) -> str: + """ + If the original target was given as a URL, this + property will return that address. + + :return: URL + :rtype: str + """ + return self._url + + @url.setter + def url(self, url: str): + """ + Set the file location as a URL + + :param str url: file network location + """ + self._url = url + @property def start_line(self) -> int: """ diff --git a/precli/renderers/__init__.py b/precli/renderers/__init__.py index 9ec4e0fa..145c4900 100644 --- a/precli/renderers/__init__.py +++ b/precli/renderers/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Secure Saurce LLC +# Copyright 2024 Secure Saurce LLC from abc import ABC from abc import abstractmethod diff --git a/precli/renderers/detailed.py b/precli/renderers/detailed.py index cae49041..ce18945e 100644 --- a/precli/renderers/detailed.py +++ b/precli/renderers/detailed.py @@ -36,9 +36,14 @@ def render(self, results: list[Result], metrics: Metrics): emoji = ":information-emoji: " style = "blue" + if result.location.url is not None: + file_name = result.location.url + else: + result.location.file_name + self.console.print( f"{emoji} {result.level.name.title()} on line " - f"{result.location.start_line} in {result.location.file_name}", + f"{result.location.start_line} in {file_name}", style=style, markup=False, ) diff --git a/precli/renderers/json.py b/precli/renderers/json.py index 3ea3b01d..76368a64 100644 --- a/precli/renderers/json.py +++ b/precli/renderers/json.py @@ -1,4 +1,4 @@ -# Copyright 2023 Secure Saurce LLC +# Copyright 2024 Secure Saurce LLC import json from rich import console @@ -19,13 +19,18 @@ def render(self, results: list[Result], metrics: Metrics): for result in results: rule = Rule.get_by_id(result.rule_id) + if result.location.url is not None: + file_name = result.location.url + else: + result.location.file_name + results_json["results"].append( { "rule_id": rule.id, "rule_name": rule.name, "cwe_id": rule.cwe.cwe_id, "severity": result.level.name, - "file_name": result.location.file_name, + "file_name": file_name, "start_line": result.location.start_line, "end_line": result.location.end_line, "start_column": result.location.start_column, diff --git a/precli/renderers/plain.py b/precli/renderers/plain.py index 46467724..70323a2b 100644 --- a/precli/renderers/plain.py +++ b/precli/renderers/plain.py @@ -1,4 +1,4 @@ -# Copyright 2023 Secure Saurce LLC +# Copyright 2024 Secure Saurce LLC import linecache from rich import console @@ -36,9 +36,15 @@ def render(self, results: list[Result], metrics: Metrics): self.console.print( f"{rule.id}: {rule.cwe.name}", ) + + if result.location.url is not None: + file_name = result.location.url + else: + result.location.file_name + # TODO(ericwb): replace hardcoded with actual scope self.console.print( - f' File "{result.location.file_name}", line ' + f' File "{file_name}", line ' f"{result.location.start_line}, in ", ) code_line = linecache.getline( diff --git a/requirements.txt b/requirements.txt index a13a22b0..9f59d607 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ rich # MIT tree_sitter>=0.20.4 tree-sitter-languages>=1.9.1 ignorelib +requests