Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial implementation of RDF Patch parser. #2863

Merged
merged 17 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions examples/parse_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from rdflib import Dataset

# RDF patch data
add_patch = """
TX .
A _:bn1 <http://example.org/predicate1> "object1" .
A _:bn1 <http://example.org/predicate2> "object2" .
TC .
"""

delete_patch = """
TX .
D _:bn1 <http://example.org/predicate1> "object1" .
TC .
"""

ds = Dataset()

# Apply add patch
ds.parse(data=add_patch, format="patch")
print("After add patch:")
for triple in ds.de_skolemize():
print(triple)

# Apply delete patch
ds.parse(data=delete_patch, format="patch")
print("After delete patch:")
for triple in ds.de_skolemize():
print(triple)
recalcitrantsupplant marked this conversation as resolved.
Show resolved Hide resolved
8 changes: 8 additions & 0 deletions rdflib/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,14 @@ def plugins(
"HextuplesParser",
)

# Register RDF Patch Parsers
register(
"patch",
Parser,
"rdflib.plugins.parsers.patch",
"RDFPatchParser",
)

# Register SPARQL Processors
register(
"sparql",
Expand Down
183 changes: 183 additions & 0 deletions rdflib/plugins/parsers/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from __future__ import annotations

from codecs import getreader
from enum import Enum
from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union

from rdflib.exceptions import ParserError as ParseError
from rdflib.graph import Dataset
from rdflib.parser import InputSource
from rdflib.plugins.parsers.nquads import NQuadsParser

# Build up from the NTriples parser:
from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace
from rdflib.term import BNode, URIRef

if TYPE_CHECKING:
import typing_extensions as te

__all__ = ["RDFPatchParser", "Operation"]

_BNodeContextType = MutableMapping[str, BNode]


class Operation(Enum):
"""
Enum of RDF Patch operations.

Operations:
- `AddTripleOrQuad` (A): Adds a triple or quad.
- `DeleteTripleOrQuad` (D): Deletes a triple or quad.
- `AddPrefix` (PA): Adds a prefix.
- `DeletePrefix` (PD): Deletes a prefix.
- `TransactionStart` (TX): Starts a transaction.
- `TransactionCommit` (TC): Commits a transaction.
- `TransactionAbort` (TA): Aborts a transaction.
- `Header` (H): Specifies a header.
"""

AddTripleOrQuad = "A"
DeleteTripleOrQuad = "D"
AddPrefix = "PA"
DeletePrefix = "PD"
TransactionStart = "TX"
TransactionCommit = "TC"
TransactionAbort = "TA"
Header = "H"


class RDFPatchParser(NQuadsParser):
def parse( # type: ignore[override]
self,
inputsource: InputSource,
sink: Dataset,
bnode_context: Optional[_BNodeContextType] = None,
skolemize: bool = False,
**kwargs: Any,
) -> Dataset:
"""
Parse inputsource as an RDF Patch file.

:type inputsource: `rdflib.parser.InputSource`
:param inputsource: the source of RDF Patch formatted data
:type sink: `rdflib.graph.Dataset`
:param sink: where to send parsed data
:type bnode_context: `dict`, optional
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
See `.W3CNTriplesParser.parse`
"""
assert sink.store.context_aware, (
"RDFPatchParser must be given" " a context aware store."
)
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
self.sink: Dataset = Dataset(store=sink.store)
self.skolemize = skolemize
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And can remove these now after the changes in 0e75969?

Probably doesn't affect anything since it defaults to False in the super class.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I just mirrored the other parsers here. Could there be use cases where someone wants to skolemize when parsing?


source = inputsource.getCharacterStream()
if not source:
source = inputsource.getByteStream()
source = getreader("utf-8")(source)

if not hasattr(source, "read"):
raise ParseError("Item to parse must be a file-like object.")

self.file = source
self.buffer = ""
while True:
self.line = __line = self.readline()
if self.line is None:
break
try:
self.parsepatch(bnode_context)
except ParseError as msg:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
return self.sink

def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
self.eat(r_wspace)
# From spec: "No comments should be included (comments start # and run to end
# of line)."
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment

# if header, transaction, skip
operation = self.operation()
self.eat(r_wspace)

if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]:
self.add_or_remove_triple_or_quad(operation, bnode_context)
elif operation == Operation.AddPrefix:
self.add_prefix()
elif operation == Operation.DeletePrefix:
self.delete_prefix()

def add_or_remove_triple_or_quad(
self, operation, bnode_context: Optional[_BNodeContextType] = None
) -> None:
self.eat(r_wspace)
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment

subject = self.labeled_bnode() or self.subject(bnode_context)
self.eat(r_wspace)

predicate = self.predicate()
self.eat(r_wspace)

obj = self.labeled_bnode() or self.object(bnode_context)
self.eat(r_wspace)

context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context)
self.eat(r_tail)

if self.line:
raise ParseError("Trailing garbage")
# Must have a context aware store - add on a normal Graph
# discards anything where the ctx != graph.identifier
if operation == Operation.AddTripleOrQuad:
if context:
self.sink.get_context(context).add((subject, predicate, obj))
else:
self.sink.default_context.add((subject, predicate, obj))
elif operation == Operation.DeleteTripleOrQuad:
if context:
self.sink.get_context(context).remove((subject, predicate, obj))
else:
self.sink.default_context.remove((subject, predicate, obj))

def add_prefix(self):
# Extract prefix and URI from the line
prefix, ns, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
ns_stripped = ns.strip("<>")
self.sink.bind(prefix, ns_stripped)
recalcitrantsupplant marked this conversation as resolved.
Show resolved Hide resolved

def delete_prefix(self):
prefix, _, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
self.sink.namespace_manager.bind(prefix, None, replace=True)

def operation(self) -> Operation:
for op in Operation:
if self.line.startswith(op.value): # type: ignore[union-attr]
self.eat_op(op.value)
return op
raise ValueError(
f'Invalid or no Operation found in line: "{self.line}". Valid Operations '
f"codes are {', '.join([op.value for op in Operation])}"
)

def eat_op(self, op: str) -> None:
self.line = self.line.lstrip(op) # type: ignore[union-attr]

def nodeid(
self, bnode_context: Optional[_BNodeContextType] = None
) -> Union[te.Literal[False], BNode, URIRef]:
if self.peek("_"):
return BNode(self.eat(r_nodeid).group(1))
return False

def labeled_bnode(self):
if self.peek("<_"):
plain_uri = self.eat(r_uriref).group(1)
bnode_id = r_nodeid.match(plain_uri).group(1) # type: ignore[union-attr]
return BNode(bnode_id)
return False
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_bnode_triples.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" <http://example.org/graph1> .
A _:bn1 <http://example.org/predicate2> "object2" <http://example.org/graph1> .
A _:bn1 <http://example.org/predicate3> "object3" <http://example.org/graph1> .
D _:bn1 <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_labeled_bnode_quads.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A <_:bn1> <http://example.org/predicate1> "object1" <http://example.org/graph1> .
A <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
A <_:bn1> <http://example.org/predicate3> "object3" <http://example.org/graph1> .
D <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_prefix.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
PA present <http://some-other-ns#> .
PA removed <http://ns-for-prefix-to-remove#> .
PD removed <http://ns-for-prefix-to-remove#> .
A <http://ns-for-prefix-to-remove#test-subj> <http://ns-for-prefix-to-remove#test-pred> "object1" .
TC .
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_triples.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A <http://example.org/subject1> <http://example.org/predicate1> "object1" .
A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
D <http://example.org/subject1> <http://example.org/predicate1> "object1" .
D <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_graph.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" _:bn1 .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_quad.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" <https://graph-1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_triple.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_uri.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A <_:bn1> <http://example.org/predicate1> "object1" .
TC .
4 changes: 4 additions & 0 deletions test/data/patch/add_delete_bnode.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
TX .
A _:bn2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/object2> .
D _:bn2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/object2> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_prefix.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
PA testing <http://example.org/> .
TC .
4 changes: 4 additions & 0 deletions test/data/patch/add_triple_and_quad.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
TX .
A <http://example.org/subject1> <http://example.org/predicate1> "object1" .
A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_graph.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D _:bn1 <http://example.org/predicate1> "object1" _:bn1 .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_quad.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D _:bn1 <http://example.org/predicate1> "object1" <https://graph-1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_triple.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D _:bn1 <http://example.org/predicate1> "object1" .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_uri.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D <_:bn1> <http://example.org/predicate1> "object1" .
TC .
Loading
Loading