Skip to content

Commit

Permalink
Merge pull request #122 from Pennycook/optimize-paths
Browse files Browse the repository at this point in the history
Optimize path handling
  • Loading branch information
Pennycook authored Oct 24, 2024
2 parents 20d11b4 + ca5dc31 commit 4cb95e2
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 24 deletions.
2 changes: 1 addition & 1 deletion codebasin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def from_file(cls, filename: str | os.PathLike[str]):
-------
A CompilationDatbase corresponding to the provided JSON file.
"""
with codebasin.util.safe_open_read_nofollow(filename, "r") as f:
with open(filename) as f:
db = codebasin.util._load_json(f, schema_name="compiledb")
return CompilationDatabase.from_json(db)

Expand Down
6 changes: 3 additions & 3 deletions codebasin/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,19 +271,19 @@ def main():
args.reports = ["all"]

# Determine the root directory based on where codebasin is run.
rootdir = os.path.realpath(os.getcwd())
rootdir = os.path.abspath(os.getcwd())

# Set up a default configuration object.
configuration = {}

# Load the analysis file if it exists.
if args.analysis_file is not None:
path = os.path.realpath(args.analysis_file)
path = os.path.abspath(args.analysis_file)
if os.path.exists(path):
if not os.path.splitext(path)[1] == ".toml":
raise RuntimeError(f"Analysis file {path} must end in .toml.")

with util.safe_open_read_nofollow(path, "rb") as f:
with open(path, "rb") as f:
try:
analysis_toml = util._load_toml(f, "analysis")
except BaseException:
Expand Down
10 changes: 5 additions & 5 deletions codebasin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def load_importcfg():
path = ".cbi/config"
if os.path.exists(path):
log.info(f"Found configuration file at {path}")
with util.safe_open_read_nofollow(path, "rb") as f:
with open(path, "rb") as f:
try:
_importcfg_toml = util._load_toml(f, "cbiconfig")
for name, compiler in _importcfg_toml["compiler"].items():
Expand Down Expand Up @@ -324,7 +324,7 @@ def load_database(dbpath, rootdir):

# Include paths may be specified relative to root
include_paths = [
os.path.realpath(os.path.join(rootdir, f)) for f in include_paths
os.path.abspath(os.path.join(rootdir, f)) for f in include_paths
]

# Files may be specified:
Expand All @@ -336,15 +336,15 @@ def load_database(dbpath, rootdir):
if os.path.isabs(command.directory):
filedir = command.directory
else:
filedir = os.path.realpath(
filedir = os.path.abspath(
rootdir,
os.path.join(command.directory),
)

if os.path.isabs(command.filename):
path = os.path.realpath(command.filename)
path = os.path.abspath(command.filename)
else:
path = os.path.realpath(os.path.join(filedir, command.filename))
path = os.path.abspath(os.path.join(filedir, command.filename))

# Compilation database may contain files that don't
# exist without running make
Expand Down
10 changes: 3 additions & 7 deletions codebasin/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import logging
import os

from codebasin import preprocessor, util
from codebasin import preprocessor
from codebasin.file_source import get_file_source

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,7 +91,7 @@ class FileParser:
"""

def __init__(self, _filename):
self._filename = os.path.realpath(_filename)
self._filename = os.path.abspath(_filename)

@staticmethod
def handle_directive(out_tree, groups, logical_line):
Expand Down Expand Up @@ -170,11 +170,7 @@ def parse_file(self, *, summarize_only=True, language=None):
f"{filename} doesn't appear "
+ "to be a language this tool can process",
)
with util.safe_open_read_nofollow(
filename,
mode="r",
errors="replace",
) as source_file:
with open(filename, errors="replace") as source_file:
groups = {
"code": LineGroup(),
"directive": LineGroup(),
Expand Down
16 changes: 16 additions & 0 deletions codebasin/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,26 @@ def __init__(self, summarize_only):
self.maps = {}
self.langs = {}
self.summarize_only = summarize_only
self._path_cache = {}

def _get_realpath(self, path: str) -> str:
"""
Returns
-------
str
Equivalent to os.path.realpath(path).
"""
if path not in self._path_cache:
real = os.path.realpath(path)
self._path_cache[path] = real
return self._path_cache[path]

def insert_file(self, fn, language=None):
"""
Build a new tree for a source file, and create an association
map for it.
"""
fn = self._get_realpath(fn)
if fn not in self.trees:
parser = file_parser.FileParser(fn)
self.trees[fn] = parser.parse_file(
Expand All @@ -60,6 +74,7 @@ def get_tree(self, fn):
"""
Return the SourceTree associated with a filename
"""
fn = self._get_realpath(fn)
if fn not in self.trees:
return None
return self.trees[fn]
Expand All @@ -68,6 +83,7 @@ def get_map(self, fn):
"""
Return the NodeAssociationMap associated with a filename
"""
fn = self._get_realpath(fn)
if fn not in self.maps:
return None
return self.maps[fn]
Expand Down
2 changes: 1 addition & 1 deletion codebasin/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def find_include_file(self, filename, this_path, is_system_include=False):

# Determine the path to the include file, if it exists
for path in local_paths + self._include_paths:
test_path = os.path.realpath(os.path.join(path, filename))
test_path = os.path.abspath(os.path.join(path, filename))
if os.path.isfile(test_path):
include_file = test_path
self.found_incl[filename] = include_file
Expand Down
2 changes: 1 addition & 1 deletion codebasin/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def __init__(self, _filename):
def __compute_file_hash(self):
chunk_size = 4096
hasher = hashlib.sha512()
with util.safe_open_read_nofollow(self.filename, "rb") as in_file:
with open(self.filename, "rb") as in_file:
for chunk in iter(lambda: in_file.read(chunk_size), b""):
hasher.update(chunk)

Expand Down
6 changes: 0 additions & 6 deletions codebasin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,6 @@ def safe_open_write_binary(fname):
return os.fdopen(fpid, "wb")


def safe_open_read_nofollow(fname, *args, **kwargs):
"""Open fname for reading, but don't follow links."""
fpid = os.open(fname, os.O_RDONLY | os.O_NOFOLLOW)
return os.fdopen(fpid, *args, **kwargs)


def valid_path(path):
"""Return true if the path passed in is valid"""
valid = True
Expand Down
1 change: 1 addition & 0 deletions tests/duplicates/cpu2
34 changes: 34 additions & 0 deletions tests/duplicates/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,40 @@ def test_duplicates(self):
setmap = mapper.walk(state)
self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap")

def test_symlinks(self):
"""Check that symlinks do not count towards divergence."""

cpufile = str(self.rootdir / "cpu/foo.cpp")
cpu2file = str(self.rootdir / "cpu2/foo.cpp")

codebase = CodeBase(self.rootdir, exclude_patterns=["gpu/"])

configuration = {
"cpu": [
{
"file": cpufile,
"defines": [],
"include_paths": [],
"include_files": [],
},
],
"cpu2": [
{
"file": cpu2file,
"defines": [],
"include_paths": [],
"include_files": [],
},
],
}

expected_setmap = {frozenset(["cpu", "cpu2"]): 1}

state = finder.find(self.rootdir, codebase, configuration)
mapper = PlatformMapper(codebase)
setmap = mapper.walk(state)
self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap")


if __name__ == "__main__":
unittest.main()

0 comments on commit 4cb95e2

Please sign in to comment.