Skip to content

Commit

Permalink
Support passing a file via standard input (#260)
Browse files Browse the repository at this point in the history
* If the argument "-" is passed, that tells precli to read the file from
stdin.
* Use the existing dependency of rich and Pygments to guess the lexer
since there is no file name in this case.
* This change also gets rid of the file extension logic of the parsers
and just keeps a lexer name instead.

Closes #214

Signed-off-by: Eric Brown <[email protected]>
  • Loading branch information
ericwb authored Feb 3, 2024
1 parent 2ee3722 commit 007a2ff
Show file tree
Hide file tree
Showing 13 changed files with 116 additions and 37 deletions.
14 changes: 10 additions & 4 deletions precli/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import io
import logging
import os
import pathlib
import sys
import tempfile
import traceback
Expand All @@ -13,7 +12,9 @@

import requests
from ignorelib import IgnoreFilterManager
from pygments import lexers
from rich import progress
from rich import syntax

import precli
from precli.core import loader
Expand Down Expand Up @@ -283,10 +284,15 @@ def parse_file(
) -> list[Result]:
try:
data = fdata.read()
file_extension = pathlib.Path(fname).suffix
if file_extension in parsers.keys():

lexer_name = syntax.Syntax.guess_lexer(fname, data)
if lexer_name == "default":
lexer = lexers.guess_lexer(data)
lexer_name = lexer.aliases[0] if lexer.aliases else lexer.name

if lexer_name in parsers.keys():
LOG.debug("working on file : %s", fname)
parser = parsers[file_extension]
parser = parsers[lexer_name]
return parser.parse(fname, data)
except KeyboardInterrupt:
sys.exit(2)
Expand Down
30 changes: 30 additions & 0 deletions precli/core/linecache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2024 Secure Saurce LLC
import linecache


class LineCache:
def __init__(self, file_name: str, file_contents: str):
"""
Initialize the cache with the given file contents.
:param file_name: Name of the file (can be <stdin>.
:param file_contents: A string containing the entire file data.
"""
self._file_name = file_name
if self._file_name == "<stdin>":
self._lines = file_contents.splitlines(keepends=True)

def getline(self, lineno: int) -> str:
"""
Return the line from the file contents at the given line number.
:param lineno: The line number to fetch, 1-based.
:return: The line at the specified line number, or an empty string if
the line does not exist.
"""
if self._file_name != "<stdin>":
return linecache.getline(self._file_name, lineno)
else:
if 0 < lineno <= len(self._lines):
return self._lines[lineno - 1]
return ""
4 changes: 2 additions & 2 deletions precli/core/loader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
from importlib.metadata import entry_points


Expand All @@ -8,6 +8,6 @@ def load_parsers(enabled: list[str], disabled: list[str]) -> dict:
discovered_plugins = entry_points(group="precli.parsers")
for plugin in discovered_plugins:
parser = plugin.load()(enabled, disabled)
parsers[parser.file_extension()] = parser
parsers[parser.lexer] = parser

return parsers
21 changes: 21 additions & 0 deletions precli/core/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def __init__(
end_line: int = -1,
start_column: int = 1,
end_column: int = -1,
snippet: str = None,
):
self._file_name = file_name
self._url = url
Expand All @@ -26,6 +27,7 @@ def __init__(
self._start_column = start_column
# TODO: default to end of line
self._end_column = end_column
self._snippet = snippet

@property
def file_name(self) -> str:
Expand Down Expand Up @@ -96,3 +98,22 @@ def end_column(self) -> int:
:rtype: int
"""
return self._end_column

@property
def snippet(self) -> str:
"""
Snippet of context of the code.
:return: snippet of context
:rtype: str
"""
return self._snippet

@snippet.setter
def snippet(self, snippet):
"""
Set the code context snippet.
:param str snippet: context snippet
"""
self._snippet = snippet
20 changes: 20 additions & 0 deletions precli/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ def rule_id(self) -> str:
"""
return self._rule_id

@property
def source_language(self) -> str:
"""
The source language.
:return: language of the source code
:rtype: str
"""
match (self._rule_id[:2]):
case "GO":
return "go"
case "JV":
return "java"
case "PY":
return "python"
case "RB":
return "ruby"
case "RS":
return "rust"

@property
def location(self) -> Location:
"""
Expand Down
22 changes: 16 additions & 6 deletions precli/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
from abc import ABC
from abc import abstractmethod
from importlib.metadata import entry_points

import tree_sitter_languages
from tree_sitter import Node

from precli.core.linecache import LineCache
from precli.core.location import Location
from precli.core.result import Result
from precli.core.suppression import Suppression
Expand All @@ -30,6 +30,7 @@ def __init__(self, lang: str, enabled: list = None, disabled: list = None):
:param list enabled: list of rules to enable
:param list disabled: list of rules to disable
"""
self._lexer = lang
self.tree_sitter_language = tree_sitter_languages.get_language(lang)
self.tree_sitter_parser = tree_sitter_languages.get_parser(lang)
self.rules = {}
Expand All @@ -56,14 +57,15 @@ def __init__(self, lang: str, enabled: list = None, disabled: list = None):
if self.rules[rule.name].wildcards:
self.wildcards |= self.rules[rule.name].wildcards

@abstractmethod
def file_extension(self) -> str:
@property
def lexer(self) -> str:
"""
File extension of files this parser can handle.
The name of the lexer
:return: file extension glob such as "*.py"
:return: lexer name
:rtype: str
"""
return self._lexer

def parse(self, file_name: str, data: bytes = None) -> list[Result]:
"""
Expand All @@ -83,7 +85,15 @@ def parse(self, file_name: str, data: bytes = None) -> list[Result]:
tree = self.tree_sitter_parser.parse(data)
self.visit([tree.root_node])

linecache = LineCache(file_name, data.decode())

for result in self.results:
start = result.location.start_line - 1
stop = result.location.end_line + 2
result.location.snippet = ""
for i in range(start, stop):
result.location.snippet += linecache.getline(i)

suppression = self.suppressions.get(result.location.start_line)
if suppression and result.rule_id in suppression.rules:
result.suppression = suppression
Expand Down
5 changes: 1 addition & 4 deletions precli/parsers/go.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
import ast
import re

Expand All @@ -18,9 +18,6 @@ def __init__(self, enabled: list = None, disabled: list = None):
)
self.SUPPRESSED_RULES = re.compile(r"(?:(GO\d\d\d|[a-z_]+),?)+")

def file_extension(self) -> str:
return ".go"

def visit_source_file(self, nodes: list[Node]):
self.suppressions = {}
self.current_symtab = SymbolTable("<source_file>")
Expand Down
5 changes: 1 addition & 4 deletions precli/parsers/java.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
from precli.parsers import Parser


class Java(Parser):
def __init__(self, enabled: list = None, disabled: list = None):
super().__init__("java", enabled, disabled)

def file_extension(self) -> str:
return ".java"
3 changes: 0 additions & 3 deletions precli/parsers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ def __init__(self, enabled: list = None, disabled: list = None):
self.SUPPRESS_COMMENT = re.compile(r"# suppress:? (?P<rules>[^#]+)?#?")
self.SUPPRESSED_RULES = re.compile(r"(?:(PY\d\d\d|[a-z_]+),?)+")

def file_extension(self) -> str:
return ".py"

def visit_module(self, nodes: list[Node]):
self.suppressions = {}
self.current_symtab = SymbolTable("<module>")
Expand Down
16 changes: 10 additions & 6 deletions precli/renderers/detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def render(self, results: list[Result], metrics: Metrics):
if result.location.url is not None:
file_name = result.location.url
else:
result.location.file_name
file_name = result.location.file_name

self.console.print(
f"{emoji} {result.level.name.title()} on line "
Expand All @@ -56,12 +56,16 @@ def render(self, results: list[Result], metrics: Metrics):
f"{result.message}",
style=style,
)
code = syntax.Syntax.from_path(
result.location.file_name,

line_offset = result.location.start_line - 2
code = syntax.Syntax(
result.location.snippet,
result.source_language,
line_numbers=True,
start_line=line_offset + 1,
line_range=(
result.location.start_line - 1,
result.location.end_line + 1,
result.location.start_line - line_offset - 1,
result.location.end_line - line_offset + 1,
),
highlight_lines=(
result.location.start_line,
Expand Down Expand Up @@ -123,7 +127,7 @@ def render(self, results: list[Result], metrics: Metrics):

code = syntax.Syntax(
code,
"python",
result.source_language,
line_numbers=True,
line_range=(start_line - before, end_line + after),
highlight_lines=highlight_lines,
Expand Down
2 changes: 1 addition & 1 deletion precli/renderers/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def render(self, results: list[Result], metrics: Metrics):
if result.location.url is not None:
file_name = result.location.url
else:
result.location.file_name
file_name = result.location.file_name

results_json["results"].append(
{
Expand Down
10 changes: 3 additions & 7 deletions precli/renderers/plain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Copyright 2024 Secure Saurce LLC
import linecache

from rich import console
from rich.padding import Padding

Expand Down Expand Up @@ -40,17 +38,15 @@ def render(self, results: list[Result], metrics: Metrics):
if result.location.url is not None:
file_name = result.location.url
else:
result.location.file_name
file_name = result.location.file_name

# TODO(ericwb): replace hardcoded <module> with actual scope
self.console.print(
f' File "{file_name}", line '
f"{result.location.start_line}, in <module>",
)
code_line = linecache.getline(
filename=result.location.file_name,
lineno=result.location.start_line,
)
code_lines = result.location.snippet.splitlines(keepends=True)
code_line = code_lines[1] if len(code_lines) > 1 else code_lines[0]
underline_width = (
result.location.end_column - result.location.start_column
)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cwe
Pygments
rich # MIT
tree_sitter>=0.20.4
tree-sitter-languages>=1.9.1
Expand Down

0 comments on commit 007a2ff

Please sign in to comment.