Skip to content

Commit

Permalink
Add option to adjust platforms from command line
Browse files Browse the repository at this point in the history
The set of platforms in an analysis can now be specified using:

  --platform <platform>

When combined with a configuration file, this new option can be used to limit
analysis to a subset of platforms, as below:

  -c config.yaml --platform CPU

When the platform specification is a .json file, it is interpreted as a
definition of a new platform:

  --platform /path/to/cpu.json --platform /path/to/gpu.json

Both options are intended to improve user productivity, by minimizing the amount
of time spent editing configuration files when making minor adjustments to an
existing analysis.

Signed-off-by: John Pennycook <[email protected]>
  • Loading branch information
Pennycook committed Feb 21, 2024
1 parent 8b8105b commit ffb87d9
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 21 deletions.
98 changes: 83 additions & 15 deletions codebasin.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,19 @@ def main():
help="Exclude files matching this pattern from the code base. "
+ "May be specified multiple times.",
)
parser.add_argument(
"-p",
"--platform",
dest="platforms",
metavar="<platform>",
action="append",
default=[],
help="Add the specified platform to the analysis. "
+ "May be a name or a path to a compilation database. "
+ "May be specified multiple times. "
+ "If not specified, all known platforms will be included.",
)

args = parser.parse_args()

stdout_log = logging.StreamHandler(sys.stdout)
Expand All @@ -120,25 +133,76 @@ def main():
)
rootdir = os.path.realpath(args.rootdir)

if args.config_file is None:
# Process the -p flag first to infer wider context.
filtered_platforms = []
additional_platforms = []
for p in args.platforms:
# If it's a path, it has to be a compilation database.
if os.path.exists(p):
if not os.path.splitext(p)[1] == ".json":
raise RuntimeError(f"Platform file {p} must end in .json.")
additional_platforms.append(p)
continue

# Otherwise, treat it as a name in the configuration file.
if not isinstance(p, str):
raise RuntimeError(f"Platform name {p} must be a string.")

# Explain the logic above in cases that look suspiciously like paths.
if "/" in p or os.path.splitext(p)[1] == ".json":
logging.getLogger("codebasin").warning(
f"{p} doesn't exist, so will be treated as a name.",
)
filtered_platforms.append(p)

# If no additional platforms are specified, a config file is required.
config_file = args.config_file
if len(additional_platforms) == 0 and config_file is None:
config_file = os.path.join(rootdir, "config.yaml")
else:
config_file = args.config_file
# Load the configuration file into a dict
if not util.ensure_yaml(config_file):
logging.getLogger("codebasin").error(
"Configuration file does not have YAML file extension.",
if not os.path.exists(config_file):
raise RuntimeError(f"Could not find config file {config_file}")

# Set up a default codebase and configuration object.
codebase = {
"files": [],
"platforms": [],
"exclude_files": set(),
"exclude_patterns": args.excludes,
"rootdir": rootdir,
}
configuration = {}

# Load the configuration file if it exists, obeying any platform filter.
if config_file is not None:
if not util.ensure_yaml(config_file):
logging.getLogger("codebasin").error(
"Configuration file does not have YAML file extension.",
)
sys.exit(1)
codebase, configuration = config.load(
config_file,
rootdir,
exclude_patterns=args.excludes,
filtered_platforms=filtered_platforms,
)
sys.exit(1)
codebase, configuration = config.load(
config_file,
rootdir,
exclude_patterns=args.excludes,
)

# Extend configuration with any additional platforms.
for p in additional_platforms:
name = os.path.splitext(os.path.basename(p))[0]
if name in codebase["platforms"]:
raise RuntimeError(f"Platform name {p} conflicts with {name}.")
db = config.load_database(p, rootdir)
configuration.update({name: db})

# Parse the source tree, and determine source line associations.
# The trees and associations are housed in state.
state = finder.find(rootdir, codebase, configuration)
legacy_warnings = True if config_file else False
state = finder.find(
rootdir,
codebase,
configuration,
legacy_warnings=legacy_warnings,
)

# Count lines for platforms
platform_mapper = PlatformMapper(codebase)
Expand Down Expand Up @@ -172,7 +236,11 @@ def report_enabled(name):

# Print clustering report
if report_enabled("clustering"):
output_prefix = os.path.realpath(guess_project_name(config_file))
if config_file is None:
platform_names = [p[0] for p in args.platforms]
output_prefix = "-".join(platform_names)
else:
output_prefix = os.path.realpath(guess_project_name(config_file))
clustering_output_name = output_prefix + "-dendrogram.png"
clustering = report.clustering(clustering_output_name, setmap)
if clustering is not None:
Expand Down
18 changes: 17 additions & 1 deletion codebasin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,13 @@ def load_platform(config, rootdir, platform_name):
return configuration


def load(config_file, rootdir, *, exclude_patterns=None):
def load(
config_file,
rootdir,
*,
exclude_patterns=None,
filtered_platforms=None,
):
"""
Load the configuration file into Python objects.
Return a (codebase, platform configuration) tuple of dicts.
Expand All @@ -597,6 +603,16 @@ def load(config_file, rootdir, *, exclude_patterns=None):

log.info("Platforms: %s", ", ".join(codebase["platforms"]))

# Limit the set of platforms in the codebase if requested.
if filtered_platforms:
for p in filtered_platforms:
if p not in codebase["platforms"]:
raise RuntimeError(
f"Platform {p} requested on the command line "
+ "does not exist in the configuration file.",
)
codebase["platforms"] = filtered_platforms

# Read each platform definition and populate platform configuration
# If files was empty, populate it with the files we find here
populate_files = not codebase["files"]
Expand Down
18 changes: 13 additions & 5 deletions codebasin/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,14 @@ def get_map(self, fn):
return self.maps[fn]


def find(rootdir, codebase, configuration, *, summarize_only=True):
def find(
rootdir,
codebase,
configuration,
*,
summarize_only=True,
legacy_warnings=True,
):
"""
Find codepaths in the files provided and return a mapping of source
lines to platforms.
Expand All @@ -141,10 +148,11 @@ def find(rootdir, codebase, configuration, *, summarize_only=True):
for e in configuration[p]:
if e["file"] not in codebase["files"]:
filename = e["file"]
log.warning(
f"{filename} found in definition of platform {p} "
+ "but missing from codebase",
)
if legacy_warnings:
log.warning(
f"{filename} found in definition of platform {p} "
+ "but missing from codebase",
)
state.insert_file(e["file"])

# Process each tree, by associating nodes with platforms
Expand Down

0 comments on commit ffb87d9

Please sign in to comment.