Skip to content

Commit

Permalink
Merge pull request #1 from jasonsewall-intel/external-update-new
Browse files Browse the repository at this point in the history
Better handle line cont. & other usability fixes
  • Loading branch information
Jason Sewall authored Oct 5, 2020
2 parents 1673301 + a046504 commit e1f48f2
Show file tree
Hide file tree
Showing 13 changed files with 922 additions and 134 deletions.
28 changes: 19 additions & 9 deletions codebasin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
optional arguments:
-h, --help show this help message and exit
-c FILE, --config FILE
configuration file (default: config.yaml)
configuration file (default: <DIR>/config.yaml)
-v, --verbose verbosity level
-q, --quiet quiet level
-r DIR, --rootdir DIR
Expand All @@ -25,7 +25,7 @@

from codebasin import config, finder, report, util, walkers

version = 1.0
version = 1.05


def report_enabled(name):
Expand Down Expand Up @@ -60,18 +60,20 @@ def guess_project_name(config_path):

# Read command-line arguments
parser = argparse.ArgumentParser(description="Code Base Investigator v" + str(version))
parser.add_argument('-r', '--rootdir', dest="rootdir", metavar='DIR',
default=os.getcwd(), type=str,
help="Set working root directory (default .)")
parser.add_argument('-c', '--config', dest='config_file', metavar='FILE', action='store',
default='config.yaml', help='configuration file (default: config.yaml)')
help='configuration file (default: <DIR>/config.yaml)')
parser.add_argument('-v', '--verbose', dest='verbose',
action='count', default=0, help='increase verbosity level')
parser.add_argument('-q', '--quiet', dest='quiet',
action='count', default=0, help='decrease verbosity level')
parser.add_argument('-r', '--rootdir', dest="rootdir", metavar='DIR',
default=os.getcwd(), type=str,
help="Set working root directory (default .)")
parser.add_argument('-R', '--report', dest='reports', metavar='REPORT', default=['all'],
choices=['all', 'summary', 'clustering'], nargs='+',
help='desired output reports (default: all)')
parser.add_argument('--batchmode', dest='batchmode', action='store_true', default=False,
help="Set batch mode (additional output for bulk operation.)")
args = parser.parse_args()

stdout_log = logging.StreamHandler(sys.stdout)
Expand All @@ -81,12 +83,16 @@ def guess_project_name(config_path):
max(1, logging.WARNING - 10 * (args.verbose - args.quiet)))
rootdir = os.path.realpath(args.rootdir)

if args.config_file is None:
config_file = os.path.join(rootdir, "config.yaml")
else:
config_file = args.config_file
# Load the configuration file into a dict
if not util.ensure_yaml(args.config_file):
if not util.ensure_yaml(config_file):
logging.getLogger("codebasin").error(
"Configuration file does not have YAML file extension.")
sys.exit(1)
codebase, configuration = config.load(args.config_file, rootdir)
codebase, configuration = config.load(config_file, rootdir)

# Parse the source tree, and determine source line associations.
# The trees and associations are housed in state.
Expand All @@ -96,7 +102,11 @@ def guess_project_name(config_path):
platform_mapper = walkers.PlatformMapper(codebase)
setmap = platform_mapper.walk(state)

output_prefix = os.path.realpath(guess_project_name(args.config_file))
output_prefix = os.path.realpath(guess_project_name(config_file))

if args.batchmode and (report_enabled("summary") or report_enabled("clustering")):
print(f"Config file: {config_file}")
print(f"Root: {rootdir}")

# Print summary report
if report_enabled("summary"):
Expand Down
4 changes: 2 additions & 2 deletions codebasin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def load_database(dbpath, rootdir):
Return a list of compilation commands, where each command is
represented as a compilation database entry.
"""
with open(dbpath, 'r') as fi:
with util.safe_open_read_nofollow(dbpath, 'r') as fi:
db = yaml.safe_load(fi)

configuration = []
Expand Down Expand Up @@ -244,7 +244,7 @@ def load(config_file, rootdir):
Return a (codebase, platform configuration) tuple of dicts.
"""
if os.path.isfile(config_file):
with open(config_file, 'r') as f:
with util.safe_open_read_nofollow(config_file, 'r') as f:
config = yaml.safe_load(f)
else:
raise RuntimeError("Could not open {!s}.".format(config_file))
Expand Down
165 changes: 47 additions & 118 deletions codebasin/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
and building a tree of nodes from it.
"""

from os.path import splitext

import os
from codebasin.file_source import get_file_source
from . import preprocessor # pylint : disable=no-name-in-module
from . import util # pylint : disable=no-name-in-module


class LineGroup:
Expand All @@ -30,25 +31,19 @@ def empty(self):
return False
return True

def add_line(self, line_num, is_countable=False):
def add_line(self, phys_int, sloc_count):
"""
Add a line to this line group. Update the extent appropriately,
and if it's a countable line, add it to the line count.
"""

if self.start_line == -1:
self.start_line = line_num

self.end_line = line_num

if self.start_line == -1 or line_num < self.start_line:
self.start_line = line_num
if self.start_line == -1 or phys_int[0] < self.start_line:
self.start_line = phys_int[0]

if line_num > self.end_line:
self.end_line = line_num
if phys_int[1] - 1 > self.end_line:
self.end_line = phys_int[1] - 1

if is_countable:
self.line_count += 1
self.line_count += sloc_count

def reset(self):
"""
Expand All @@ -58,13 +53,12 @@ def reset(self):
self.start_line = -1
self.end_line = -1

def merge(self, line_group, count=False):
def merge(self, line_group):
"""
Merge another line group into this line group, and reset the
other group.
"""
if count:
self.line_count += line_group.line_count
self.line_count += line_group.line_count

if self.start_line == -1:
self.start_line = line_group.start_line
Expand All @@ -84,63 +78,25 @@ class FileParser:
"""

def __init__(self, _filename):
self._filename = _filename
self.full_line = ''

split = splitext(_filename)
if len(split) == 2:
self._file_extension = split[1].lower()
else:
self._file_extension = None
self._filename = os.path.realpath(_filename)

@staticmethod
def line_info(line):
"""
Determine if the input line is a directive by checking if the
first by looking for a '#' as the first non-whitespace
character. Also determine if the last character before a new
line is a line continuation character '\'.
Return a (directive, line_continue) tuple.
"""

directive = False
line_continue = False

for c in line:
if c == '#':
directive = True
break
elif c not in [' ', '\t']:
break

if line.rstrip("\n\r")[-1:] == '\\':
line_continue = True

return (directive, line_continue)

def handle_directive(self, out_tree, line_num, comment_cleaner, groups):
def handle_directive(out_tree, groups, logical_line):
"""
Handle inserting code and directive nodes, where appropriate.
Update the file group, and reset the code and directive groups.
"""
# We will actually use this directive, if it is not empty
self.full_line = comment_cleaner.strip_comments(self.full_line)
if self.full_line.strip():
# We need to finalize the previously started
# CodeNode (if there was one) before processing
# this DirectiveNode
if not groups['code'].empty():
groups['code'].add_line(line_num - 1)
self.insert_code_node(out_tree, groups['code'])

groups['file'].merge(groups['code'])
# We need to finalize the previously started
# CodeNode (if there was one) before processing
# this DirectiveNode
if not groups['code'].empty():
FileParser.insert_code_node(out_tree, groups['code'])
groups['file'].merge(groups['code'])

self.insert_directive_node(out_tree, groups['directive'])
FileParser.insert_directive_node(out_tree, groups['directive'], logical_line)

groups['file'].merge(groups['directive'])
else:
groups['code'].merge(groups['directive'])
groups['file'].merge(groups['directive'])

@staticmethod
def insert_code_node(tree, line_group):
Expand All @@ -151,13 +107,14 @@ def insert_code_node(tree, line_group):
line_group.start_line, line_group.end_line, line_group.line_count)
tree.insert(new_node)

def insert_directive_node(self, tree, line_group):
@staticmethod
def insert_directive_node(tree, line_group, logical_line):
"""
Build a directive node by parsing a directive line, and insert a
new directive node into the tree.
"""
new_node = preprocessor.DirectiveParser(preprocessor.Lexer(
self.full_line, line_group.start_line).tokenize()).parse()
logical_line, line_group.start_line).tokenize()).parse()
new_node.start_line = line_group.start_line
new_node.end_line = line_group.end_line
new_node.num_lines = line_group.line_count
Expand All @@ -169,73 +126,45 @@ def parse_file(self):
representing this file, and return it.
"""

file_comment_cleaner = preprocessor.CommentCleaner(self._file_extension)
if file_comment_cleaner.filetype == 'c':
cpp_comment_cleaner = file_comment_cleaner
else:
cpp_comment_cleaner = preprocessor.CommentCleaner('.c')

out_tree = preprocessor.SourceTree(self._filename)
with open(self._filename, mode='r', errors='replace') as source_file:
previous_continue = False
file_source = get_file_source(self._filename)
if not file_source:
raise RuntimeError(f"{self._filename} doesn't appear " +
"to be a language this tool can process")
with util.safe_open_read_nofollow(self._filename, mode='r', errors='replace') as source_file:

groups = {'code': LineGroup(),
'directive': LineGroup(),
'file': LineGroup()
}
'file': LineGroup()}

groups['file'].start_line = 1

lines = source_file.readlines()
for (line_num, line) in enumerate(lines, 1):
# Determine if this line starts with a # (directive)
# and/or ends with a \ (line continuation)
(in_directive, continue_line) = self.line_info(line)

# Only follow continuation for directives
if previous_continue or in_directive:
source = file_source(source_file)
try:
while True:
logical_line = next(source)
phys_int = logical_line.phys_interval()
# Only follow continuation for directives
if logical_line.category == 'CPP_DIRECTIVE':
# Add this into the directive lines, even if it
# might not be a directive we count

# Add this into the directive lines, even if it
# might not be a directive we count
groups['directive'].add_line(line_num, True)
groups['directive'].add_line(phys_int, logical_line.local_sloc)

# If this line starts a new directive, flush the
# line buffer
if in_directive and not previous_continue:
self.full_line = ''
FileParser.handle_directive(out_tree, groups, logical_line.flushed_line)

previous_continue = continue_line

# If this line also contains a continuation
# character
if continue_line:
self.full_line += line.rstrip("\\\n\r")
# If this line ends a previously continued line
else:
self.full_line += line.rstrip("\n\r")

self.handle_directive(out_tree, line_num, cpp_comment_cleaner,
groups)

# FallBack is that this line is a simple code line.
else:
previous_continue = False

# If the line isn't empty after stripping comments,
# count it as code
if file_comment_cleaner.strip_comments(line[0:-1]).strip():
groups['code'].add_line(line_num, True)
# FallBack is that this line is a simple code line.
else:
groups['code'].add_line(line_num)
groups['code'].add_line(phys_int, logical_line.local_sloc)
except StopIteration as it:
# pylint: disable=unpacking-non-sequence
_, physical_loc = it.value

# Insert any code lines left at the end of the file
if not groups['code'].empty():
groups['code'].add_line(len(lines))
groups['code'].add_line((groups['code'].start_line, physical_loc - 1), 0)
self.insert_code_node(out_tree, groups['code'])

groups['file'].merge(groups['code'])

groups['file'].add_line(len(lines))
out_tree.root.num_lines = groups['file'].end_line
out_tree.root.total_sloc = groups['file'].line_count
return out_tree
Loading

0 comments on commit e1f48f2

Please sign in to comment.