Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The long-await standalone SHACL Rule Expander mode #260

Merged
merged 11 commits into from
Oct 11, 2024
Merged
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d
## [Unreleased]

### Added
- SHACL Rules Expander Mode
- A new alternative Run Mode for PySHACL
- PySHACL will not validate the DataGraph against Shapes and Constraints, instead it will simply run all SHACL-AF Rules to expand the DataGraph.
- By default it will output a new graph containing the existing DataGraph Triples plus the expanded triples
- Run with inplace mode to expand the new triples directly into the input DataGraph
- Focus Node Filtering
- You can now pass in a list of focus nodes to the validator, and it will only validate those focus nodes.
- Note, you still need to pass in a SHACL Shapes Graph, and the shapes still need to target the focus nodes.
Expand All @@ -19,6 +24,9 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d
- If you give the validator a list of Shapes to use, and a list of focus nodes, the validator will operate in
a highly-targeted mode, it feeds those focus nodes directly into those given Shapes for validation.
- In this mode, the selected SHACL Shape does not need to specify any focus-targeting mechanisms of its own.
- Combined Rules Expander Mode with Shape Selection
- The combination of SHACL Rules Expander Mode and Shape Selection will allow specialised workflows.
- For example, you can run specific expansion rules from a SHACL Shapes File, based on the new triples required.

### Changed
- Don't make a clone of the DataGraph if the input data graph is ephemeral.
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ COPY . .
RUN chown -R pyshacl:pyshacl /home/pyshacl /app && chmod -R 775 /home/pyshacl /app
USER pyshacl
ENV PATH="/home/pyshacl/.local/bin:$PATH"
RUN pip3 install "poetry>=1.5.0,<2.0"
RUN pip3 install "poetry>=1.8.3,<2.0"
RUN poetry install --no-dev --extras "js http"
USER root
RUN apk del build-dependencies
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ dev-coverage = ["pytest-cov", "coverage", "platformdirs"]

[tool.poetry.scripts]
pyshacl = "pyshacl.cli:main"
pyshacl_rules = "pyshacl.cli_rules:main"
pyshacl_validate = "pyshacl.cli:main"
pyshacl_server = "pyshacl.http:cli"

Expand Down
6 changes: 4 additions & 2 deletions pyshacl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# -*- coding: latin-1 -*-
#
from .entrypoints import shacl_rules, validate
from .rule_expand_runner import RuleExpandRunner
from .shape import Shape
from .shapes_graph import ShapesGraph
from .validate import Validator, validate
from .validator import Validator

# version compliant with https://www.python.org/dev/peps/pep-0440/
__version__ = '0.26.0'
# Don't forget to change the version number in pyproject.toml, Dockerfile, and CITATION.cff along with this one

__all__ = ['validate', 'Validator', '__version__', 'Shape', 'ShapesGraph']
__all__ = ['validate', 'shacl_rules', 'Validator', 'RuleExpandRunner', '__version__', 'Shape', 'ShapesGraph']
13 changes: 9 additions & 4 deletions pyshacl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import os
import sys

from pyshacl.cli import main
from pyshacl.cli import main as validate_main
from pyshacl.cli_rules import main as rules_main


def str_is_true(s_var: str):
Expand All @@ -16,11 +17,15 @@ def str_is_true(s_var: str):
do_server = os.getenv("PYSHACL_HTTP", "")
do_server = os.getenv("PYSHACL_SERVER", do_server)

if (len(sys.argv) > 1 and str(sys.argv[1]).lower() in ('serve', 'server', '--server')) or (
first_arg = None if len(sys.argv) < 2 else sys.argv[1]

if first_arg is not None and str(first_arg).lower() in ('rules', '--rules'):
rules_main(prog="python3 -m pyshacl")
elif (first_arg is not None and str(first_arg).lower() in ('serve', 'server', '--server')) or (
do_server and str_is_true(do_server)
):
from pyshacl.sh_http import main as http_main

http_main()

main(prog="python3 -m pyshacl")
else:
validate_main(prog="python3 -m pyshacl")
12 changes: 12 additions & 0 deletions pyshacl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,13 @@ def str_is_true(s_var: str):
help='Send output to a file (defaults to stdout).',
default=sys.stdout,
)
parser.add_argument(
'--rules',
help='Ignore validation options, run PySHACL in Rules Expansion mode. Same as `pyshacl_rules`.',
action='store_true',
dest='do_rules',
default=False,
)
parser.add_argument(
'--server',
help='Ignore all the rest of the options, start the HTTP Server. Same as `pyshacl_server`.',
Expand All @@ -240,6 +247,11 @@ def main(prog: Union[str, None] = None) -> None:

# http_main calls sys.exit(0) and never returns
http_main()
if args.do_rules:
from pyshacl.cli_rules import main as rules_main

# rules_main calls sys.exit(0) and never returns
rules_main()
if not args.data:
# No datafile give, and not starting in server mode.
sys.stderr.write('Input Error. No DataGraph file or endpoint supplied.\n')
Expand Down
274 changes: 274 additions & 0 deletions pyshacl/cli_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import sys
from io import BufferedReader
from typing import Union, cast

from pyshacl import __version__, shacl_rules
from pyshacl.cli import ShowVersion
from pyshacl.errors import (
ConstraintLoadError,
ReportableRuntimeError,
RuleLoadError,
ShapeLoadError,
ValidationFailure,
)

parser = argparse.ArgumentParser(
description='PySHACL {} SHACL Rules Expander command line tool.'.format(str(__version__))
)
parser.add_argument(
'data',
metavar='DataGraph',
help='The file or endpoint containing the Target Data Graph.',
default=None,
nargs='?',
)
parser.add_argument(
'-s',
'--shapes',
'--shacl',
dest='shacl',
action='store',
nargs='?',
help='A file containing the SHACL Shapes Graph.',
)
parser.add_argument(
'-e',
'--ont-graph',
dest='ont',
action='store',
nargs='?',
help='A file path or URL to a document containing extra ontological information. '
'RDFS and OWL definitions from this are used to inoculate the DataGraph.',
)
parser.add_argument(
'-i',
'--inference',
dest='inference',
action='store',
default='none',
choices=('none', 'rdfs', 'owlrl', 'both'),
help='Choose a type of inferencing to run against the Data Graph before validating.',
)
parser.add_argument(
'-im',
'--imports',
dest='imports',
action='store_true',
default=False,
help='Allow import of sub-graphs defined in statements with owl:imports.',
)
parser.add_argument(
'-j',
'--js',
dest='js',
action='store_true',
default=False,
help='Enable features from the SHACL-JS Specification.',
)
parser.add_argument(
'-it',
'--iterate-rules',
dest='iterate_rules',
action='store_true',
default=False,
help="Run Shape's SHACL Rules iteratively until the data_graph reaches a steady state.",
)
parser.add_argument(
'-d',
'--debug',
dest='debug',
action='store_true',
default=False,
help='Output additional verbose runtime messages.',
)
parser.add_argument(
'--focus',
dest='focus',
action='store',
help='Optional IRIs of focus nodes from the DataGraph, the shapes will validate only these node. Comma-separated list.',
nargs="?",
default=None,
)
parser.add_argument(
'--shape',
dest='shape',
action='store',
help='Optional IRIs of a NodeShape or PropertyShape from the SHACL ShapesGraph, only these shapes will be used to validate the DataGraph. Comma-separated list.',
nargs="?",
default=None,
)
parser.add_argument(
'-f',
'--format',
dest='format',
action='store',
help='Choose an output format. Default is \"trig\" for Datasets and \"turtle\" for Graphs.',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument(
'-df',
'--data-file-format',
dest='data_file_format',
action='store',
help='Explicitly state the RDF File format of the input DataGraph file. Default=\"auto\".',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument(
'-sf',
'--shacl-file-format',
dest='shacl_file_format',
action='store',
help='Explicitly state the RDF File format of the input SHACL file. Default=\"auto\".',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument(
'-ef',
'--ont-file-format',
dest='ont_file_format',
action='store',
help='Explicitly state the RDF File format of the extra ontology file. Default=\"auto\".',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument('-V', '--version', action=ShowVersion, help='Show PySHACL version and exit.')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason this is capital -V not lowercase -v?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats so it matches the -V param in the regular (validator) pyshacl cli tool. It has always used the capital -V. I don't remember the original reason.

parser.add_argument(
'-o',
'--output',
dest='output',
nargs='?',
type=argparse.FileType('w'),
help='Send output to a file (defaults to stdout).',
default=sys.stdout,
)
# parser.add_argument('-h', '--help', action="help", help='Show this help text.')


def main(prog: Union[str, None] = None) -> None:
if prog is not None and len(prog) > 0:
parser.prog = prog

args = parser.parse_args()
if not args.data:
# No datafile give, and not starting in server mode.
sys.stderr.write('Input Error. No DataGraph file or endpoint supplied.\n')
parser.print_usage(sys.stderr)
sys.exit(1)
runner_kwargs = {
'debug': args.debug,
'serialize_expanded_graph': True,
}
data_file = None
data_graph: Union[BufferedReader, str]

try:
data_file = open(args.data, 'rb')
except FileNotFoundError:
sys.stderr.write('Input Error. DataGraph file not found.\n')
sys.exit(1)
except PermissionError:
sys.stderr.write('Input Error. DataGraph file not readable.\n')
sys.exit(1)
else:
# NOTE: This cast is not necessary in Python >= 3.10.
data_graph = cast(BufferedReader, data_file)
if args.shacl is not None:
runner_kwargs['shacl_graph'] = args.shacl
if args.ont is not None:
runner_kwargs['ont_graph'] = args.ont
if args.inference != 'none':
runner_kwargs['inference'] = args.inference
if args.imports:
runner_kwargs['do_owl_imports'] = True
if args.js:
runner_kwargs['js'] = True
if args.focus:
runner_kwargs['focus_nodes'] = [_f.strip() for _f in args.focus.split(',')]
if args.shape:
runner_kwargs['use_shapes'] = [_s.strip() for _s in args.shape.split(',')]
if args.iterate_rules:
runner_kwargs['iterate_rules'] = True
if args.shacl_file_format:
_f: str = args.shacl_file_format
if _f != "auto":
runner_kwargs['shacl_graph_format'] = _f
if args.ont_file_format:
_f = args.ont_file_format
if _f != "auto":
runner_kwargs['ont_graph_format'] = _f
if args.data_file_format:
_f = args.data_file_format
if _f != "auto":
runner_kwargs['data_graph_format'] = _f
if args.format != "auto":
runner_kwargs['serialize_expanded_graph_format'] = args.format
exit_code: Union[int, None] = None
try:
output_txt = shacl_rules(data_graph, **runner_kwargs)
if isinstance(output_txt, BaseException):
raise output_txt
except ValidationFailure as vf:
args.output.write("Rules Runner generated a Validation Failure result:\n")
args.output.write(str(vf.message))
args.output.write("\n")
exit_code = 1
except ShapeLoadError as sle:
sys.stderr.write("Rules Runner encountered a Shape Load Error:\n")
sys.stderr.write(str(sle))
exit_code = 2
except ConstraintLoadError as cle:
sys.stderr.write("Rules Runner encountered a Constraint Load Error:\n")
sys.stderr.write(str(cle))
exit_code = 2
except RuleLoadError as rle:
sys.stderr.write("Rules Runner encountered a Rule Load Error:\n")
sys.stderr.write(str(rle))
exit_code = 2
except ReportableRuntimeError as rre:
sys.stderr.write("Rules Runner encountered a Runtime Error:\n")
sys.stderr.write(str(rre.message))
sys.stderr.write("\nIf you believe this is a bug in pyshacl, open an Issue on the pyshacl github page.\n")
exit_code = 2
except NotImplementedError as nie:
sys.stderr.write("Rules Runner feature is not implemented:\n")
if len(nie.args) > 0:
sys.stderr.write(str(nie.args[0]))
else:
sys.stderr.write("No message provided.")
sys.stderr.write("\nIf your use-case requires this feature, open an Issue on the pyshacl github page.\n")
exit_code = 3
except RuntimeError as re:
import traceback

traceback.print_tb(re.__traceback__)
sys.stderr.write(
"\n\nRules Runner encountered a Runtime Error. Please report this to the PySHACL issue tracker.\n"
)
exit_code = 2
finally:
if data_file is not None:
try:
data_file.close()
except Exception as e:
sys.stderr.write("Error closing data file:\n")
sys.stderr.write(str(e))
if exit_code is not None:
sys.exit(exit_code)

if isinstance(output_txt, bytes):
output_unicode = output_txt.decode('utf-8')
else:
output_unicode = output_txt
args.output.write(output_unicode)
args.output.close()
sys.exit(0)


if __name__ == "__main__":
main()
Loading