From bdea4c99823b235b92931face775a8f9a0ef93f2 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 23 Jan 2024 14:17:58 -0800 Subject: [PATCH 01/10] Fix incorrect URLs in JSON schema Signed-off-by: John Pennycook --- codebasin/schema/compilation-database.schema | 2 +- codebasin/schema/config.schema | 2 +- codebasin/schema/importcfg.schema | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/codebasin/schema/compilation-database.schema b/codebasin/schema/compilation-database.schema index 85bc963..549d29e 100644 --- a/codebasin/schema/compilation-database.schema +++ b/codebasin/schema/compilation-database.schema @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/intel/code-base-investigator/schema/compilation-database.schema", + "$id": "https://raw.githubusercontent.com/intel/code-base-investigator/main/codebasin/schema/compilation-database.schema", "title": "Compilation Database", "description": "Compilation database format used by many projects.", "type": "array", diff --git a/codebasin/schema/config.schema b/codebasin/schema/config.schema index 94b270c..a3f8d70 100644 --- a/codebasin/schema/config.schema +++ b/codebasin/schema/config.schema @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/intel/code-base-investigator/schema/config.schema", + "$id": "https://raw.githubusercontent.com/intel/code-base-investigator/main/codebasin/schema/config.schema", "title": "Code Base Investigator Configuration File", "description": "Lists codebase files and compilation options", "type": "object", diff --git a/codebasin/schema/importcfg.schema b/codebasin/schema/importcfg.schema index b7f1b57..bd24bef 100644 --- a/codebasin/schema/importcfg.schema +++ b/codebasin/schema/importcfg.schema @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/intel/code-base-investigator/schema/importcfg.schema", + "$id": "https://raw.githubusercontent.com/intel/code-base-investigator/main/codebasin/schema/importcfg.schema", "title": "Code Base Investigator Import Configuration File", "description": "Configuration options for importing commands.", "type": "object", From f6fbb7397dedd034887ca493b61fe7604c44927e Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 23 Jan 2024 14:28:54 -0800 Subject: [PATCH 02/10] Add copy of P3 coverage schema Signed-off-by: John Pennycook --- codebasin/schema/coverage-0.1.0.schema | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 codebasin/schema/coverage-0.1.0.schema diff --git a/codebasin/schema/coverage-0.1.0.schema b/codebasin/schema/coverage-0.1.0.schema new file mode 100644 index 0000000..6eccb1e --- /dev/null +++ b/codebasin/schema/coverage-0.1.0.schema @@ -0,0 +1,37 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/intel/p3-analysis-library/main/p3/data/coverage-0.1.0.schema", + "title": "Coverage", + "description": "Lines of code used in each file of a code base.", + "type": "array", + "items": { + "type": "object", + "properties": { + "file": { + "type": "string" + }, + "regions": { + "type": "array", + "items": { + "type": "array", + "prefixItems": [ + { + "type": "integer" + }, + { + "type": "integer" + }, + { + "type": "integer" + } + ], + "items": false + } + } + }, + "required": [ + "file", + "regions" + ] + } +} From 9f7dabb0ff53e5db59ca276468a03aea99bb19f7 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 23 Jan 2024 14:31:04 -0800 Subject: [PATCH 03/10] Add JSON schema files to the codebasin package Signed-off-by: John Pennycook --- MANIFEST.in | 4 ++++ setup.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..a6c7123 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include codebasin/schema/compilation-database.schema +include codebasin/schema/config.schema +include codebasin/schema/coverage-0.1.0.schema +include codebasin/schema/importcfg.schema diff --git a/setup.py b/setup.py index 5d348bb..79a7f68 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ author='John Pennycook', author_email='john.pennycook@intel.com', url='https://www.github.com/intel/code-base-investigator', - packages=['codebasin', 'codebasin.walkers'], + packages=['codebasin', 'codebasin.schema', 'codebasin.walkers'], + include_package_data=True, scripts=['codebasin.py'], classifiers=['Development Status :: 3 - Alpha', 'Environment :: Console', From fadae33c4de1c00a3f978556dbd40689b81acc75 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 23 Jan 2024 14:34:05 -0800 Subject: [PATCH 04/10] Fix formatting of setup.py Signed-off-by: John Pennycook --- setup.py | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index 79a7f68..b6ac285 100644 --- a/setup.py +++ b/setup.py @@ -4,25 +4,30 @@ from setuptools import setup -setup(name='codebasin', - version='1.1.0', - description='Code Base Investigator', - author='John Pennycook', - author_email='john.pennycook@intel.com', - url='https://www.github.com/intel/code-base-investigator', - packages=['codebasin', 'codebasin.schema', 'codebasin.walkers'], - include_package_data=True, - scripts=['codebasin.py'], - classifiers=['Development Status :: 3 - Alpha', - 'Environment :: Console', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python', - 'Topic :: Software Development'], - python_requires='>=3.9', - install_requires=['numpy', - 'matplotlib', - 'pyyaml', - 'scipy>=1.11.1', - 'jsonschema'] - ) +setup( + name="codebasin", + version="1.1.0", + description="Code Base Investigator", + author="John Pennycook", + author_email="john.pennycook@intel.com", + url="https://www.github.com/intel/code-base-investigator", + packages=["codebasin", "codebasin.schema", "codebasin.walkers"], + include_package_data=True, + scripts=["codebasin.py"], + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", + "Topic :: Software Development", + ], + python_requires=">=3.9", + install_requires=[ + "numpy", + "matplotlib", + "pyyaml", + "scipy>=1.11.1", + "jsonschema", + ], +) From 57d532f688a450a97a2e60037f58a3c758d9093c Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 23 Jan 2024 15:11:49 -0800 Subject: [PATCH 05/10] Simplify schema usage and validation - Loads schema files from within the package. - Uses utility functions instead of duplicating validation code. - Deprecates old utility functions. Signed-off-by: John Pennycook --- codebasin/config.py | 128 +----------------------------------------- codebasin/util.py | 133 +++++++++++++++++++++++++++++++------------- etc/coverage.py | 2 +- 3 files changed, 98 insertions(+), 165 deletions(-) diff --git a/codebasin/config.py b/codebasin/config.py index e9cb19a..e22079b 100644 --- a/codebasin/config.py +++ b/codebasin/config.py @@ -8,128 +8,17 @@ import collections import glob import itertools as it -import json import logging import os import re import shlex -import jsonschema import yaml from codebasin import util log = logging.getLogger("codebasin") -_compiledb_schema_id = ( - "https://raw.githubusercontent.com/intel/" - "code-base-investigator/schema/compilation-database.schema" -) -_compiledb_schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": _compiledb_schema_id, - "title": "Compilation Database", - "description": "Compilation database format used by many projects.", - "type": "array", - "items": { - "type": "object", - "properties": { - "directory": {"type": "string"}, - "arguments": {"type": "array", "items": {"type": "string"}}, - "file": {"type": "string"}, - "command": {"type": "string"}, - "output": {"type": "string"}, - }, - "anyOf": [ - { - "required": [ - "arguments", - ], - }, - { - "required": [ - "command", - ], - }, - ], - }, -} - -_config_schema_id = ( - "https://raw.githubusercontent.com/intel/" - "code-base-investigator/schema/config.schema" -) - -_config_schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": _config_schema_id, - "title": "Code Base Investigator Configuration File", - "description": "Lists codebase files and compilation options", - "type": "object", - "properties": { - "codebase": { - "type": "object", - "properties": { - "files": {"type": "array", "items": {"type": "string"}}, - "platforms": {"type": "array", "items": {"type": "string"}}, - "exclude_files": { - "type": "array", - "items": {"type": "string"}, - }, - }, - "required": ["files", "platforms"], - }, - }, - "patternProperties": { - ".*": { - "type": "object", - "properties": { - "files": {"type": "array", "items": {"type": "string"}}, - "defines": {"type": "array", "items": {"type": "string"}}, - "include_paths": { - "type": "array", - "items": {"type": "string"}, - }, - "commands": {"type": "string"}, - }, - "anyOf": [{"required": ["files"]}, {"required": ["commands"]}], - }, - }, - "additionalProperties": False, - "required": ["codebase"], -} - -_importcfg_schema_id = ( - "https://raw.githubusercontent.com/intel/", - "code-base-investigator/schema/importcfg.schema", -) - -_importcfg_schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": _importcfg_schema_id, - "title": "Code Base Investigator Import Configuration File", - "description": "Configuration options for importing commands.", - "type": "object", - "properties": { - "compilers": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - }, - "options": {"type": "array", "items": {"type": "string"}}, - }, - "required": ["name", "options"], - "additionalProperties": False, - }, - }, - }, - "required": ["compilers"], - "additionalProperties": False, -} - def extract_defines(args): """ @@ -292,7 +181,7 @@ def load_importcfg(): log.info(f"Found import configuration file at {path}") with open(path) as f: try: - _importcfg_json = json.load(f) + _importcfg_json = util._load_json(f, "importcfg") for compiler in _importcfg_json["compilers"]: _importcfg[compiler["name"]] = compiler["options"] except BaseException: @@ -513,14 +402,7 @@ def load_database(dbpath, rootdir): represented as a compilation database entry. """ with util.safe_open_read_nofollow(dbpath, "r") as fi: - db = json.load(fi) - - # Validate compilation database against schema - try: - jsonschema.validate(instance=db, schema=_compiledb_schema) - except Exception: - msg = "Compilation database failed schema validation" - raise ValueError(msg) + db = util._load_json(fi, schema_name="compiledb") configuration = [] for e in db: @@ -690,11 +572,7 @@ def load(config_file, rootdir): # Validate config against a schema # We don't use any advanced features of YAML, so can use JSON here - try: - jsonschema.validate(instance=config, schema=_config_schema) - except Exception: - msg = "Configuration file failed schema validation" - raise ValueError(msg) + util._validate_json(config, schema_name="config") # Read codebase definition if "codebase" in config: diff --git a/codebasin/util.py b/codebasin/util.py index 607a396..008c5ea 100644 --- a/codebasin/util.py +++ b/codebasin/util.py @@ -11,42 +11,14 @@ import json import logging import os +import pkgutil +import typing +import warnings from collections.abc import Iterable from os.path import splitext import jsonschema -_coverage_schema_id = ( - "https://raw.githubusercontent.com/intel/" - "p3-analysis-library/p3/schema/coverage-0.1.0.schema" -) -_coverage_schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": _coverage_schema_id, - "title": "Coverage", - "description": "Lines of code used in each file of a code base.", - "type": "array", - "items": { - "type": "object", - "properties": { - "file": {"type": "string"}, - "regions": { - "type": "array", - "items": { - "type": "array", - "prefixItems": [ - {"type": "integer"}, - {"type": "integer"}, - {"type": "integer"}, - ], - "items": False, - }, - }, - }, - "required": ["file", "regions"], - }, -} - log = logging.getLogger("codebasin") @@ -141,6 +113,91 @@ def valid_path(path): return valid +def _validate_json(json_object: str, schema_name: str) -> bool: + """ + Validate JSON against a schema. + + Parameters + ---------- + json_object : Object + The JSON to validate. + + schema_name : {'compiledb', 'config', 'coverage', 'importcfg'} + The schema to validate against. + + Returns + ------- + bool + True if the JSON is valid. + + Raises + ------ + ValueError + If the JSON fails to validate, or the schema name is unrecognized. + + RuntimeError + If the schema file cannot be located. + """ + schema_paths = { + "compiledb": "schema/compilation-database.schema", + "config": "schema/config.schema", + "coverage": "schema/coverage-0.1.0.schema", + "importcfg": "schema/importcfg.schema", + } + if schema_name not in schema_paths.keys(): + raise ValueError("Unrecognized schema name.") + + schema_path = schema_paths[schema_name] + schema_string = pkgutil.get_data("codebasin", schema_path) + if not schema_string: + msg = f"Could not locate schema file {schema_path}" + raise RuntimeError(msg) + + schema = json.loads(schema_string) + print(schema) + + try: + jsonschema.validate(instance=json_object, schema=schema) + except jsonschema.exceptions.ValidationError: + msg = f"JSON failed schema validation against {schema_path}" + raise ValueError(msg) + except jsonschema.exceptions.SchemaError: + msg = f"{schema_path} is not a valid schema" + raise RuntimeError(msg) + + return True + + +def _load_json(file_object: typing.TextIO, schema_name: str) -> object: + """ + Load JSON from file and validate it against a schema. + + Parameters + ---------- + file_object : typing.TextIO + The file object to load from. + + schema_name : {'compiledb', 'config', 'coverage', 'importcfg'} + The schema to validate against. + + Returns + ------- + Object + The loaded JSON. + + Raises + ------ + ValueError + If the JSON fails to validate, or the schema name is unrecognized. + + RuntimeError + If the schema file cannot be located. + """ + json_object = json.load(file_object) + _validate_json(json_object, schema_name) + return json_object + + def validate_coverage_json(json_string: str) -> bool: """ Validate coverage JSON string against schema. @@ -163,15 +220,13 @@ def validate_coverage_json(json_string: str) -> bool: TypeError If the JSON string is not a string. """ + warnings.warn( + "Direct access to JSON validation is deprecated.", + DeprecationWarning, + ) + if not isinstance(json_string, str): raise TypeError("Coverage must be a JSON string.") instance = json.loads(json_string) - - try: - jsonschema.validate(instance=instance, schema=_coverage_schema) - except Exception: - msg = "Coverage string failed schema validation" - raise ValueError(msg) - - return True + return _validate_json(instance, "coverage") diff --git a/etc/coverage.py b/etc/coverage.py index f53a852..dc83843 100644 --- a/etc/coverage.py +++ b/etc/coverage.py @@ -66,7 +66,7 @@ for region in exports[p][filename]: covobject["regions"].append(region) covarray.append(covobject) + util._validate_json(covarray, "coverage") json_string = json.dumps(covarray) - util.validate_coverage_json(json_string) with open(covpath, "w") as fp: fp.write(json_string) From 7dd17f5b3622ad9a59967868ae0d4ce02198225b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 23 Jan 2024 15:23:55 -0800 Subject: [PATCH 06/10] Fix compilation database and importcfg schema Loading from file highlighted that we had extra commas. Signed-off-by: John Pennycook --- codebasin/schema/compilation-database.schema | 8 ++++---- codebasin/schema/importcfg.schema | 4 ++-- codebasin/util.py | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/codebasin/schema/compilation-database.schema b/codebasin/schema/compilation-database.schema index 549d29e..7c9593b 100644 --- a/codebasin/schema/compilation-database.schema +++ b/codebasin/schema/compilation-database.schema @@ -24,17 +24,17 @@ }, "output": { "type": "string" - }, + } }, "anyOf": [ { "required": [ - "arguments", - ], + "arguments" + ] }, { "required": [ - "command", + "command" ] } ] diff --git a/codebasin/schema/importcfg.schema b/codebasin/schema/importcfg.schema index bd24bef..5675171 100644 --- a/codebasin/schema/importcfg.schema +++ b/codebasin/schema/importcfg.schema @@ -11,7 +11,7 @@ "type": "object", "properties": { "name": { - "type": "string", + "type": "string" }, "options": { "type": "array", @@ -21,7 +21,7 @@ } }, "required": ["name", "options"], - "additionalProperties": false, + "additionalProperties": false } } }, diff --git a/codebasin/util.py b/codebasin/util.py index 008c5ea..681976a 100644 --- a/codebasin/util.py +++ b/codebasin/util.py @@ -154,7 +154,6 @@ def _validate_json(json_object: str, schema_name: str) -> bool: raise RuntimeError(msg) schema = json.loads(schema_string) - print(schema) try: jsonschema.validate(instance=json_object, schema=schema) From 2455ce0e3ed196eefa3421a5d6b7fd15e2064551 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 24 Jan 2024 08:07:51 -0800 Subject: [PATCH 07/10] Convert tuples in coverage to lists Validating the JSON object directly (without first dumping and reloading a JSON string) requires us to follow the schema more strictly. Previously, the call to json.dumps() was converting tuples to lists for us. Signed-off-by: John Pennycook --- etc/coverage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/coverage.py b/etc/coverage.py index dc83843..ecc6019 100644 --- a/etc/coverage.py +++ b/etc/coverage.py @@ -64,7 +64,7 @@ for filename in exports[p]: covobject = {"file": filename, "regions": []} for region in exports[p][filename]: - covobject["regions"].append(region) + covobject["regions"].append(list(region)) covarray.append(covobject) util._validate_json(covarray, "coverage") json_string = json.dumps(covarray) From 1842cb969bb41dddf8311294583bd641f6eac604 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 26 Jan 2024 07:21:44 -0800 Subject: [PATCH 08/10] Fix _validate_json docstring Signed-off-by: John Pennycook --- codebasin/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebasin/util.py b/codebasin/util.py index 681976a..9fc1337 100644 --- a/codebasin/util.py +++ b/codebasin/util.py @@ -113,7 +113,7 @@ def valid_path(path): return valid -def _validate_json(json_object: str, schema_name: str) -> bool: +def _validate_json(json_object: object, schema_name: str) -> bool: """ Validate JSON against a schema. From 7256a84cb7719054ce09c18ae506f67380b6e7c5 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 26 Jan 2024 07:27:26 -0800 Subject: [PATCH 09/10] Add separate function for validating YAML Signed-off-by: John Pennycook --- codebasin/config.py | 3 +-- codebasin/util.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/codebasin/config.py b/codebasin/config.py index e22079b..bab0f01 100644 --- a/codebasin/config.py +++ b/codebasin/config.py @@ -571,8 +571,7 @@ def load(config_file, rootdir): raise RuntimeError(f"Could not open {config_file!s}.") # Validate config against a schema - # We don't use any advanced features of YAML, so can use JSON here - util._validate_json(config, schema_name="config") + util._validate_yaml(config, schema_name="config") # Read codebase definition if "codebase" in config: diff --git a/codebasin/util.py b/codebasin/util.py index 9fc1337..8d31b39 100644 --- a/codebasin/util.py +++ b/codebasin/util.py @@ -167,6 +167,38 @@ def _validate_json(json_object: object, schema_name: str) -> bool: return True +def _validate_yaml(yaml_object: object, schema_name: str) -> bool: + """ + Validate YAML against a schema. + + Parameters + ---------- + yaml_object : Object + The YAML to validate. + + schema_name : {'config'} + The schema to validate against. + + Returns + ------- + bool + True if the YAML is valid. + + Raises + ------ + ValueError + If the YAML fails to validate, or the schema name is unrecognized. + + RuntimeError + If the schema file cannot be located. + """ + if schema_name != "config": + raise ValueError("Unrecognized schema name.") + + # We don't use any advanced features of YAML, so can use JSON here + return _validate_json(yaml_object, schema_name) + + def _load_json(file_object: typing.TextIO, schema_name: str) -> object: """ Load JSON from file and validate it against a schema. From 814b1ec9d2ce075d052826b394ea7c8ea6da5e56 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 26 Jan 2024 11:31:06 -0800 Subject: [PATCH 10/10] Bump version to 1.1.1 Signed-off-by: John Pennycook --- codebasin.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codebasin.py b/codebasin.py index 239c02a..f6b2bb8 100755 --- a/codebasin.py +++ b/codebasin.py @@ -13,7 +13,7 @@ from codebasin import config, finder, report, util from codebasin.walkers.platform_mapper import PlatformMapper -version = "1.1.0" +version = "1.1.1" def report_enabled(name): diff --git a/setup.py b/setup.py index b6ac285..6ce65a3 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="codebasin", - version="1.1.0", + version="1.1.1", description="Code Base Investigator", author="John Pennycook", author_email="john.pennycook@intel.com",