Skip to content

Commit

Permalink
Replace FileLanguage class with a Language enum
Browse files Browse the repository at this point in the history
- Avoids referencing languages by name (as strings).
- Enables multiple constructors (from_path and from_ext).
- Enables a dedicated sentinel value (UNKNOWN) instead of None.

Signed-off-by: John Pennycook <[email protected]>
  • Loading branch information
Pennycook committed Jan 29, 2024
1 parent 71cef77 commit 9549265
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 14 deletions.
3 changes: 2 additions & 1 deletion codebasin/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from codebasin import preprocessor, util
from codebasin.file_source import get_file_source
from codebasin.source import Language

log = logging.getLogger("codebasin")

Expand Down Expand Up @@ -156,7 +157,7 @@ def insert_directive_node(tree, line_group, logical_line):

tree.insert(new_node)

def parse_file(self, *, summarize_only=True, language=None):
def parse_file(self, *, summarize_only=True, language=Language.UNKNOWN):
"""
Parse the file that this parser points at, build a SourceTree
representing this file, and return it.
Expand Down
14 changes: 7 additions & 7 deletions codebasin/file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import itertools as it
import logging

from codebasin.language import FileLanguage
from codebasin.source import Language

log = logging.getLogger("codebasin")

Expand Down Expand Up @@ -698,20 +698,20 @@ def asm_file_source(fp, relaxed=False):
return (total_sloc, total_physical_lines)


def get_file_source(path, assumed_lang=None):
def get_file_source(path, assumed_lang=Language.UNKNOWN):
"""
Return a C or Fortran line source for path depending on
the language we can detect, or fail.
"""
lang = FileLanguage(path).get_language()
if assumed_lang:
lang = Language.from_path(path)
if assumed_lang != Language.UNKNOWN:
lang = assumed_lang

if lang == "fortran-free":
if lang == Language.FORTRAN_FREE:
return fortran_file_source
elif lang in ["c", "c++"]:
elif lang in [Language.C, Language.CPLUSPLUS]:
return c_file_source
elif lang in ["asm"]:
elif lang in [Language.ASM]:
return asm_file_source
else:
raise RuntimeError(f"Could not determine language of {path}.")
10 changes: 5 additions & 5 deletions codebasin/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os

from codebasin import file_parser, platform, preprocessor, util
from codebasin.language import FileLanguage
from codebasin.source import Language
from codebasin.walkers.tree_associator import TreeAssociator

log = logging.getLogger("codebasin")
Expand Down Expand Up @@ -84,7 +84,7 @@ def _map_filename(self, fn):
self.fileinfo[bn].append(FileInfo(fn, size, sha))
return fn

def insert_file(self, fn, language=None):
def insert_file(self, fn, language=Language.UNKNOWN):
"""
Build a new tree for a source file, and create an association
map for it.
Expand All @@ -97,10 +97,10 @@ def insert_file(self, fn, language=None):
language=language,
)
self.maps[fn] = collections.defaultdict(set)
if language:
self.langs[fn] = language
if language == Language.UNKNOWN:
self.langs[fn] = Language.from_path(fn)
else:
self.langs[fn] = FileLanguage(fn).get_language()
self.langs[fn] = language

def get_filenames(self):
"""
Expand Down
2 changes: 2 additions & 0 deletions codebasin/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import logging
import os
import warnings

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -60,6 +61,7 @@ def __init__(self, filename):
if self._extension in self._language_extensions[lang]:
self._language = lang
break
warnings.warn("FileLanguage is deprecated. Use Language instead.")

def get_language(self):
return self._language
55 changes: 55 additions & 0 deletions codebasin/source/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (C) 2019-2024 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause

import os
from enum import Enum, auto


class Language(Enum):
ASM = auto()
C = auto()
CPLUSPLUS = auto()
FORTRAN_FREE = auto()
FORTRAN_MIXED = auto()
UNKNOWN = auto()

@classmethod
def from_extension(cls, ext: str) -> "Language":
if ext in [".s", ".S", ".asm"]:
return cls.ASM

if ext in [".c", ".h"]:
return cls.C

if ext in [
".c++",
".cxx",
".cpp",
".cc",
".hpp",
".hxx",
".h++",
".hh",
".inc",
".inl",
".tcc",
".icc",
".ipp",
".cu",
".cuh",
".cl",
]:
return cls.CPLUSPLUS

if ext in [".f90", ".F90"]:
return cls.FORTRAN_FREE

if ext in [".f", ".ftn", ".fpp", ".F", ".FOR", ".FTN", ".FPP"]:
return cls.FORTRAN_FIXED

return cls.UNKNOWN

@classmethod
def from_path(cls, path: str) -> "Language":
ext = os.path.splitext(path)[1]
return cls.from_extension(ext)
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
author="John Pennycook",
author_email="[email protected]",
url="https://www.github.com/intel/code-base-investigator",
packages=["codebasin", "codebasin.schema", "codebasin.walkers"],
packages=[
"codebasin",
"codebasin.source",
"codebasin.schema",
"codebasin.walkers",
],
include_package_data=True,
scripts=["codebasin.py"],
classifiers=[
Expand Down

0 comments on commit 9549265

Please sign in to comment.