diff --git a/docs/plugin_parsers.rst b/docs/plugin_parsers.rst index 7b3c2a568..56cb5d1eb 100644 --- a/docs/plugin_parsers.rst +++ b/docs/plugin_parsers.rst @@ -24,6 +24,7 @@ json-ld :class:`~rdflib.plugins.parsers.jsonld.JsonLDParser` hext :class:`~rdflib.plugins.parsers.hext.HextuplesParser` n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser` nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser` +patch :class:`~rdflib.plugins.parsers.patch.RDFPatchParser` nt :class:`~rdflib.plugins.parsers.ntriples.NTParser` trix :class:`~rdflib.plugins.parsers.trix.TriXParser` turtle :class:`~rdflib.plugins.parsers.notation3.TurtleParser` diff --git a/examples/parse_patch.py b/examples/parse_patch.py new file mode 100644 index 000000000..41a57fc70 --- /dev/null +++ b/examples/parse_patch.py @@ -0,0 +1,35 @@ +from rdflib import Dataset + + +def main(): + # RDF patch data + add_patch = """ + TX . + A _:bn1 "object1" . + A _:bn1 "object2" . + TC . + """ + + delete_patch = """ + TX . + D _:bn1 "object1" . + TC . + """ + + ds = Dataset() + + # Apply add patch + ds.parse(data=add_patch, format="patch") + print("After add patch:") + for triple in ds: + print(triple) + + # Apply delete patch + ds.parse(data=delete_patch, format="patch") + print("After delete patch:") + for triple in ds: + print(triple) + + +if __name__ == "__main__": + main() diff --git a/rdflib/plugin.py b/rdflib/plugin.py index 921f218a7..82b46ad86 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -488,6 +488,14 @@ def plugins( "HextuplesParser", ) +# Register RDF Patch Parsers +register( + "patch", + Parser, + "rdflib.plugins.parsers.patch", + "RDFPatchParser", +) + # Register SPARQL Processors register( "sparql", diff --git a/rdflib/plugins/parsers/patch.py b/rdflib/plugins/parsers/patch.py new file mode 100644 index 000000000..5e8f12d1f --- /dev/null +++ b/rdflib/plugins/parsers/patch.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from codecs import getreader +from enum import Enum +from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union + +from rdflib.exceptions import ParserError as ParseError +from rdflib.graph import Dataset +from rdflib.parser import InputSource +from rdflib.plugins.parsers.nquads import NQuadsParser + +# Build up from the NTriples parser: +from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace +from rdflib.term import BNode, URIRef + +if TYPE_CHECKING: + import typing_extensions as te + +__all__ = ["RDFPatchParser", "Operation"] + +_BNodeContextType = MutableMapping[str, BNode] + + +class Operation(Enum): + """ + Enum of RDF Patch operations. + + Operations: + - `AddTripleOrQuad` (A): Adds a triple or quad. + - `DeleteTripleOrQuad` (D): Deletes a triple or quad. + - `AddPrefix` (PA): Adds a prefix. + - `DeletePrefix` (PD): Deletes a prefix. + - `TransactionStart` (TX): Starts a transaction. + - `TransactionCommit` (TC): Commits a transaction. + - `TransactionAbort` (TA): Aborts a transaction. + - `Header` (H): Specifies a header. + """ + + AddTripleOrQuad = "A" + DeleteTripleOrQuad = "D" + AddPrefix = "PA" + DeletePrefix = "PD" + TransactionStart = "TX" + TransactionCommit = "TC" + TransactionAbort = "TA" + Header = "H" + + +class RDFPatchParser(NQuadsParser): + def parse( # type: ignore[override] + self, + inputsource: InputSource, + sink: Dataset, + bnode_context: Optional[_BNodeContextType] = None, + skolemize: bool = False, + **kwargs: Any, + ) -> Dataset: + """ + Parse inputsource as an RDF Patch file. + + :type inputsource: `rdflib.parser.InputSource` + :param inputsource: the source of RDF Patch formatted data + :type sink: `rdflib.graph.Dataset` + :param sink: where to send parsed data + :type bnode_context: `dict`, optional + :param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances. + See `.W3CNTriplesParser.parse` + """ + assert sink.store.context_aware, ( + "RDFPatchParser must be given" " a context aware store." + ) + # type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]") + self.sink: Dataset = Dataset(store=sink.store) + self.skolemize = skolemize + + source = inputsource.getCharacterStream() + if not source: + source = inputsource.getByteStream() + source = getreader("utf-8")(source) + + if not hasattr(source, "read"): + raise ParseError("Item to parse must be a file-like object.") + + self.file = source + self.buffer = "" + while True: + self.line = __line = self.readline() + if self.line is None: + break + try: + self.parsepatch(bnode_context) + except ParseError as msg: + raise ParseError("Invalid line (%s):\n%r" % (msg, __line)) + return self.sink + + def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None: + self.eat(r_wspace) + # From spec: "No comments should be included (comments start # and run to end + # of line)." + if (not self.line) or self.line.startswith("#"): + return # The line is empty or a comment + + # if header, transaction, skip + operation = self.operation() + self.eat(r_wspace) + + if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]: + self.add_or_remove_triple_or_quad(operation, bnode_context) + elif operation == Operation.AddPrefix: + self.add_prefix() + elif operation == Operation.DeletePrefix: + self.delete_prefix() + + def add_or_remove_triple_or_quad( + self, operation, bnode_context: Optional[_BNodeContextType] = None + ) -> None: + self.eat(r_wspace) + if (not self.line) or self.line.startswith("#"): + return # The line is empty or a comment + + subject = self.labeled_bnode() or self.subject(bnode_context) + self.eat(r_wspace) + + predicate = self.predicate() + self.eat(r_wspace) + + obj = self.labeled_bnode() or self.object(bnode_context) + self.eat(r_wspace) + + context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context) + self.eat(r_tail) + + if self.line: + raise ParseError("Trailing garbage") + # Must have a context aware store - add on a normal Graph + # discards anything where the ctx != graph.identifier + if operation == Operation.AddTripleOrQuad: + if context: + self.sink.get_context(context).add((subject, predicate, obj)) + else: + self.sink.default_context.add((subject, predicate, obj)) + elif operation == Operation.DeleteTripleOrQuad: + if context: + self.sink.get_context(context).remove((subject, predicate, obj)) + else: + self.sink.default_context.remove((subject, predicate, obj)) + + def add_prefix(self): + # Extract prefix and URI from the line + prefix, ns, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr] + ns_stripped = ns.strip("<>") + self.sink.bind(prefix, ns_stripped) + + def delete_prefix(self): + prefix, _, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr] + self.sink.namespace_manager.bind(prefix, None, replace=True) + + def operation(self) -> Operation: + for op in Operation: + if self.line.startswith(op.value): # type: ignore[union-attr] + self.eat_op(op.value) + return op + raise ValueError( + f'Invalid or no Operation found in line: "{self.line}". Valid Operations ' + f"codes are {', '.join([op.value for op in Operation])}" + ) + + def eat_op(self, op: str) -> None: + self.line = self.line.lstrip(op) # type: ignore[union-attr] + + def nodeid( + self, bnode_context: Optional[_BNodeContextType] = None + ) -> Union[te.Literal[False], BNode, URIRef]: + if self.peek("_"): + return BNode(self.eat(r_nodeid).group(1)) + return False + + def labeled_bnode(self): + if self.peek("<_"): + plain_uri = self.eat(r_uriref).group(1) + bnode_id = r_nodeid.match(plain_uri).group(1) # type: ignore[union-attr] + return BNode(bnode_id) + return False diff --git a/test/data/patch/add_and_delete_bnode_triples.rdp b/test/data/patch/add_and_delete_bnode_triples.rdp new file mode 100644 index 000000000..deef3c318 --- /dev/null +++ b/test/data/patch/add_and_delete_bnode_triples.rdp @@ -0,0 +1,6 @@ +TX . +A _:bn1 "object1" . +A _:bn1 "object2" . +A _:bn1 "object3" . +D _:bn1 "object2" . +TC . diff --git a/test/data/patch/add_and_delete_labeled_bnode_quads.rdp b/test/data/patch/add_and_delete_labeled_bnode_quads.rdp new file mode 100644 index 000000000..1180b69dd --- /dev/null +++ b/test/data/patch/add_and_delete_labeled_bnode_quads.rdp @@ -0,0 +1,6 @@ +TX . +A <_:bn1> "object1" . +A <_:bn1> "object2" . +A <_:bn1> "object3" . +D <_:bn1> "object2" . +TC . diff --git a/test/data/patch/add_and_delete_prefix.rdp b/test/data/patch/add_and_delete_prefix.rdp new file mode 100644 index 000000000..c92c3a2c0 --- /dev/null +++ b/test/data/patch/add_and_delete_prefix.rdp @@ -0,0 +1,6 @@ +TX . +PA present . +PA removed . +PD removed . +A "object1" . +TC . diff --git a/test/data/patch/add_and_delete_triples.rdp b/test/data/patch/add_and_delete_triples.rdp new file mode 100644 index 000000000..e403f1001 --- /dev/null +++ b/test/data/patch/add_and_delete_triples.rdp @@ -0,0 +1,6 @@ +TX . +A "object1" . +A "object2" . +D "object1" . +D "object2" . +TC . diff --git a/test/data/patch/add_bnode_graph.rdp b/test/data/patch/add_bnode_graph.rdp new file mode 100644 index 000000000..adf7f8d51 --- /dev/null +++ b/test/data/patch/add_bnode_graph.rdp @@ -0,0 +1,3 @@ +TX . +A _:bn1 "object1" _:bn1 . +TC . diff --git a/test/data/patch/add_bnode_quad.rdp b/test/data/patch/add_bnode_quad.rdp new file mode 100644 index 000000000..0426e88a2 --- /dev/null +++ b/test/data/patch/add_bnode_quad.rdp @@ -0,0 +1,3 @@ +TX . +A _:bn1 "object1" . +TC . diff --git a/test/data/patch/add_bnode_triple.rdp b/test/data/patch/add_bnode_triple.rdp new file mode 100644 index 000000000..c19601145 --- /dev/null +++ b/test/data/patch/add_bnode_triple.rdp @@ -0,0 +1,3 @@ +TX . +A _:bn1 "object1" . +TC . diff --git a/test/data/patch/add_bnode_uri.rdp b/test/data/patch/add_bnode_uri.rdp new file mode 100644 index 000000000..32b5aa270 --- /dev/null +++ b/test/data/patch/add_bnode_uri.rdp @@ -0,0 +1,3 @@ +TX . +A <_:bn1> "object1" . +TC . diff --git a/test/data/patch/add_delete_bnode.rdp b/test/data/patch/add_delete_bnode.rdp new file mode 100644 index 000000000..4b5e0591b --- /dev/null +++ b/test/data/patch/add_delete_bnode.rdp @@ -0,0 +1,4 @@ +TX . +A _:bn2 . +D _:bn2 . +TC . diff --git a/test/data/patch/add_prefix.rdp b/test/data/patch/add_prefix.rdp new file mode 100644 index 000000000..d3becb272 --- /dev/null +++ b/test/data/patch/add_prefix.rdp @@ -0,0 +1,3 @@ +TX . +PA testing . +TC . diff --git a/test/data/patch/add_triple_and_quad.rdp b/test/data/patch/add_triple_and_quad.rdp new file mode 100644 index 000000000..fa82d221a --- /dev/null +++ b/test/data/patch/add_triple_and_quad.rdp @@ -0,0 +1,4 @@ +TX . +A "object1" . +A "object2" . +TC . diff --git a/test/data/patch/delete_bnode_graph.rdp b/test/data/patch/delete_bnode_graph.rdp new file mode 100644 index 000000000..f79a12d9f --- /dev/null +++ b/test/data/patch/delete_bnode_graph.rdp @@ -0,0 +1,3 @@ +TX . +D _:bn1 "object1" _:bn1 . +TC . diff --git a/test/data/patch/delete_bnode_quad.rdp b/test/data/patch/delete_bnode_quad.rdp new file mode 100644 index 000000000..9974879de --- /dev/null +++ b/test/data/patch/delete_bnode_quad.rdp @@ -0,0 +1,3 @@ +TX . +D _:bn1 "object1" . +TC . diff --git a/test/data/patch/delete_bnode_triple.rdp b/test/data/patch/delete_bnode_triple.rdp new file mode 100644 index 000000000..aa3225831 --- /dev/null +++ b/test/data/patch/delete_bnode_triple.rdp @@ -0,0 +1,3 @@ +TX . +D _:bn1 "object1" . +TC . diff --git a/test/data/patch/delete_bnode_uri.rdp b/test/data/patch/delete_bnode_uri.rdp new file mode 100644 index 000000000..a2958f23d --- /dev/null +++ b/test/data/patch/delete_bnode_uri.rdp @@ -0,0 +1,3 @@ +TX . +D <_:bn1> "object1" . +TC . diff --git a/test/test_parsers/test_parser_patch.py b/test/test_parsers/test_parser_patch.py new file mode 100644 index 000000000..2bdb1425c --- /dev/null +++ b/test/test_parsers/test_parser_patch.py @@ -0,0 +1,146 @@ +import os + +import pytest + +from rdflib import BNode, Dataset, URIRef +from test.data import TEST_DATA_DIR + +TEST_BASE = os.path.join(TEST_DATA_DIR, "patch") + + +class TestPatchParser: + def test_01(self): + ds = Dataset() + nq_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_triple_and_quad.rdp"), os.curdir + ) + with open(nq_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 2 + + def test_02(self): + ds = Dataset() + nq_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_and_delete_bnode_triples.rdp"), + os.curdir, + ) + with open(nq_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 2 + + def test_03(self): + ds = Dataset() + nq_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_and_delete_triples.rdp"), os.curdir + ) + with open(nq_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 0 + + def test_04(self): + ds = Dataset() + nq_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_prefix.rdp"), os.curdir + ) + with open(nq_path, "rb") as data: + ds.parse(data, format="patch") + namespaces = [tup[0] for tup in (ds.namespaces())] + assert "testing" in namespaces + + def test_05(self): + ds = Dataset() + nq_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_and_delete_prefix.rdp"), os.curdir + ) + with open(nq_path, "rb") as data: + ds.parse(data, format="patch") + namespaces = [tup[0] for tup in (ds.namespaces())] + assert "present" in namespaces + assert ( + "@prefix removed: ." not in ds.serialize() + ) + + def test_06(self): + ds = Dataset() + add_bnode_triple_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_bnode_triple.rdp"), os.curdir + ) + with open(add_bnode_triple_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 1 + delete_bnode_triple_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/delete_bnode_triple.rdp"), os.curdir + ) + with open(delete_bnode_triple_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 0 + + @pytest.mark.xfail(reason="De skolemization is undone by ConjunctiveGraph") + def test_07(self): + ds = Dataset() + add_bnode_triple_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_bnode_triple.rdp"), os.curdir + ) + with open(add_bnode_triple_path, "rb") as data: + ds.parse(data, format="patch") + # test will pass if changed to `for t in ds.de_skolemize():`: + for t in ds: + assert BNode("bn1") in t + assert ( + URIRef("https://rdflib.github.io/.well-known/genid/rdflib/bn1") not in t + ) + + def test_08(self): + ds = Dataset() + add_bnode_quad_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_bnode_quad.rdp"), os.curdir + ) + with open(add_bnode_quad_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 1 + delete_bnode_quad_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/delete_bnode_quad.rdp"), os.curdir + ) + with open(delete_bnode_quad_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 0 + + def test_09(self): + ds = Dataset() + add_bnode_graph_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_bnode_graph.rdp"), os.curdir + ) + with open(add_bnode_graph_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 1 + delete_bnode_graph_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/delete_bnode_graph.rdp"), os.curdir + ) + with open(delete_bnode_graph_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 0 + + def test_10(self): + ds = Dataset() + add_bnode_uri_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_bnode_uri.rdp"), os.curdir + ) + with open(add_bnode_uri_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 1 + delete_bnode_uri_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/delete_bnode_uri.rdp"), os.curdir + ) + with open(delete_bnode_uri_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 0 + + def test_11(self): + ds = Dataset() + nq_path = os.path.relpath( + os.path.join(TEST_DATA_DIR, "patch/add_and_delete_labeled_bnode_quads.rdp"), + os.curdir, + ) + with open(nq_path, "rb") as data: + ds.parse(data, format="patch") + assert len(ds) == 2