diff --git a/codebasin/file_parser.py b/codebasin/file_parser.py index 2b14873..19aa70f 100644 --- a/codebasin/file_parser.py +++ b/codebasin/file_parser.py @@ -10,6 +10,7 @@ from codebasin import preprocessor, util from codebasin.file_source import get_file_source +from codebasin.source import Language log = logging.getLogger("codebasin") @@ -156,7 +157,7 @@ def insert_directive_node(tree, line_group, logical_line): tree.insert(new_node) - def parse_file(self, *, summarize_only=True, language=None): + def parse_file(self, *, summarize_only=True, language=Language.UNKNOWN): """ Parse the file that this parser points at, build a SourceTree representing this file, and return it. diff --git a/codebasin/file_source.py b/codebasin/file_source.py index 296b75c..e84446d 100644 --- a/codebasin/file_source.py +++ b/codebasin/file_source.py @@ -8,7 +8,7 @@ import itertools as it import logging -from codebasin.language import FileLanguage +from codebasin.source import Language log = logging.getLogger("codebasin") @@ -698,20 +698,20 @@ def asm_file_source(fp, relaxed=False): return (total_sloc, total_physical_lines) -def get_file_source(path, assumed_lang=None): +def get_file_source(path, assumed_lang=Language.UNKNOWN): """ Return a C or Fortran line source for path depending on the language we can detect, or fail. """ - lang = FileLanguage(path).get_language() - if assumed_lang: + lang = Language.from_path(path) + if assumed_lang != Language.UNKNOWN: lang = assumed_lang - if lang == "fortran-free": + if lang == Language.FORTRAN_FREE: return fortran_file_source - elif lang in ["c", "c++"]: + elif lang in [Language.C, Language.CPLUSPLUS]: return c_file_source - elif lang in ["asm"]: + elif lang in [Language.ASM]: return asm_file_source else: raise RuntimeError(f"Could not determine language of {path}.") diff --git a/codebasin/finder.py b/codebasin/finder.py index 9108376..0e115d9 100644 --- a/codebasin/finder.py +++ b/codebasin/finder.py @@ -10,7 +10,7 @@ import os from codebasin import file_parser, platform, preprocessor, util -from codebasin.language import FileLanguage +from codebasin.source import Language from codebasin.walkers.tree_associator import TreeAssociator log = logging.getLogger("codebasin") @@ -84,7 +84,7 @@ def _map_filename(self, fn): self.fileinfo[bn].append(FileInfo(fn, size, sha)) return fn - def insert_file(self, fn, language=None): + def insert_file(self, fn, language=Language.UNKNOWN): """ Build a new tree for a source file, and create an association map for it. @@ -97,10 +97,10 @@ def insert_file(self, fn, language=None): language=language, ) self.maps[fn] = collections.defaultdict(set) - if language: - self.langs[fn] = language + if language == Language.UNKNOWN: + self.langs[fn] = Language.from_path(fn) else: - self.langs[fn] = FileLanguage(fn).get_language() + self.langs[fn] = language def get_filenames(self): """ diff --git a/codebasin/language.py b/codebasin/language.py index 94b386a..8ca02c1 100644 --- a/codebasin/language.py +++ b/codebasin/language.py @@ -8,6 +8,7 @@ import logging import os +import warnings log = logging.getLogger(__name__) @@ -60,6 +61,7 @@ def __init__(self, filename): if self._extension in self._language_extensions[lang]: self._language = lang break + warnings.warn("FileLanguage is deprecated. Use Language instead.") def get_language(self): return self._language diff --git a/codebasin/source/__init__.py b/codebasin/source/__init__.py new file mode 100644 index 0000000..4761eb8 --- /dev/null +++ b/codebasin/source/__init__.py @@ -0,0 +1,55 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import os +from enum import Enum, auto + + +class Language(Enum): + ASM = auto() + C = auto() + CPLUSPLUS = auto() + FORTRAN_FREE = auto() + FORTRAN_MIXED = auto() + UNKNOWN = auto() + + @classmethod + def from_extension(cls, ext: str) -> "Language": + if ext in [".s", ".S", ".asm"]: + return cls.ASM + + if ext in [".c", ".h"]: + return cls.C + + if ext in [ + ".c++", + ".cxx", + ".cpp", + ".cc", + ".hpp", + ".hxx", + ".h++", + ".hh", + ".inc", + ".inl", + ".tcc", + ".icc", + ".ipp", + ".cu", + ".cuh", + ".cl", + ]: + return cls.CPLUSPLUS + + if ext in [".f90", ".F90"]: + return cls.FORTRAN_FREE + + if ext in [".f", ".ftn", ".fpp", ".F", ".FOR", ".FTN", ".FPP"]: + return cls.FORTRAN_FIXED + + return cls.UNKNOWN + + @classmethod + def from_path(cls, path: str) -> "Language": + ext = os.path.splitext(path)[1] + return cls.from_extension(ext) diff --git a/setup.py b/setup.py index 6ce65a3..470687a 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,12 @@ author="John Pennycook", author_email="john.pennycook@intel.com", url="https://www.github.com/intel/code-base-investigator", - packages=["codebasin", "codebasin.schema", "codebasin.walkers"], + packages=[ + "codebasin", + "codebasin.source", + "codebasin.schema", + "codebasin.walkers", + ], include_package_data=True, scripts=["codebasin.py"], classifiers=[