From e404cb420260d5fab4827de806bf86decb563d46 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Wed, 14 Sep 2022 19:05:02 +1000 Subject: [PATCH 01/11] rdfstar parsers and serializers update --- rdflib/plugin.py | 36 + rdflib/plugins/parsers/ntriples-star.py | 2387 ++++++++++++++++ rdflib/plugins/parsers/trigstar.py | 2438 ++++++++++++++++ rdflib/plugins/parsers/turtlestar.py | 2452 +++++++++++++++++ rdflib/plugins/serializers/ntriples-star.py | 527 ++++ rdflib/plugins/serializers/trigstar.py | 552 ++++ rdflib/plugins/serializers/turtlestar.py | 547 ++++ rdflib/term.py | 90 + .../ntriples-star-bad-syntax-1.nt | 1 + .../ntriples-star-bad-syntax-2.nt | 1 + .../ntriples-star-bad-syntax-3.nt | 1 + .../ntriples-star-bad-syntax-4.nt | 1 + test/ntriples-star/ntriples-star-bnode-1.nt | 2 + test/ntriples-star/ntriples-star-bnode-2.nt | 2 + test/ntriples-star/ntriples-star-nested-1.nt | 3 + test/ntriples-star/ntriples-star-nested-2.nt | 3 + test/ntriples-star/ntriples-star-syntax-1.nt | 1 + test/ntriples-star/ntriples-star-syntax-2.nt | 1 + test/ntriples-star/ntriples-star-syntax-3.nt | 1 + test/ntriples-star/ntriples-star-syntax-4.nt | 1 + test/ntriples-star/ntriples-star-syntax-5.nt | 1 + test/rdf-star/test_Rdfstar.py | 210 ++ .../turtle-star/nt-ttl-star-bad-syntax-1.ttl | 1 + .../turtle-star/nt-ttl-star-bad-syntax-2.ttl | 1 + .../turtle-star/nt-ttl-star-bad-syntax-3.ttl | 1 + .../turtle-star/nt-ttl-star-bad-syntax-4.ttl | 1 + .../turtle-star/nt-ttl-star-bnode-1.ttl | 2 + .../turtle-star/nt-ttl-star-bnode-2.ttl | 2 + .../turtle-star/nt-ttl-star-nested-1.ttl | 3 + .../turtle-star/nt-ttl-star-nested-2.ttl | 3 + .../turtle-star/nt-ttl-star-syntax-1.ttl | 1 + .../turtle-star/nt-ttl-star-syntax-2.ttl | 1 + .../turtle-star/nt-ttl-star-syntax-3.ttl | 1 + .../turtle-star/nt-ttl-star-syntax-4.ttl | 1 + .../turtle-star/nt-ttl-star-syntax-5.ttl | 1 + .../turtle-star/turtle-star-annotation-1.ttl | 3 + .../turtle-star/turtle-star-annotation-2.ttl | 10 + .../turtle-star/turtle-star-syntax-bad-01.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-02.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-03.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-04.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-05.ttl | 3 + .../turtle-star/turtle-star-syntax-bad-06.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-07.ttl | 3 + .../turtle-star/turtle-star-syntax-bad-08.ttl | 3 + .../turtle-star-syntax-bad-ann-1.ttl | 6 + .../turtle-star-syntax-bad-ann-2.ttl | 3 + .../turtle-star-syntax-basic-01.ttl | 4 + .../turtle-star-syntax-basic-02.ttl | 4 + .../turtle-star-syntax-bnode-01.ttl | 4 + .../turtle-star-syntax-bnode-02.ttl | 4 + .../turtle-star-syntax-bnode-03.ttl | 3 + .../turtle-star-syntax-compound.ttl | 11 + .../turtle-star-syntax-inside-01.ttl | 4 + .../turtle-star-syntax-inside-02.ttl | 5 + .../turtle-star-syntax-nested-01.ttl | 7 + .../turtle-star-syntax-nested-02.ttl | 5 + test/test_parser_ntriplesstar.py | 96 + test/test_parser_trigstar.py | 148 + test/test_parser_turtlestar.py | 219 ++ test/trig-star/trig-star-annotation-1.trig | 3 + test/trig-star/trig-star-annotation-2.trig | 12 + test/trig-star/trig-star-syntax-bad-01.trig | 6 + test/trig-star/trig-star-syntax-bad-02.trig | 6 + test/trig-star/trig-star-syntax-bad-03.trig | 6 + test/trig-star/trig-star-syntax-bad-04.trig | 6 + test/trig-star/trig-star-syntax-bad-05.trig | 5 + test/trig-star/trig-star-syntax-bad-06.trig | 5 + test/trig-star/trig-star-syntax-bad-07.trig | 4 + test/trig-star/trig-star-syntax-bad-08.trig | 3 + .../trig-star/trig-star-syntax-bad-ann-1.trig | 4 + .../trig-star/trig-star-syntax-bad-ann-2.trig | 3 + test/trig-star/trig-star-syntax-basic-01.trig | 6 + test/trig-star/trig-star-syntax-basic-02.trig | 6 + test/trig-star/trig-star-syntax-bnode-01.trig | 6 + test/trig-star/trig-star-syntax-bnode-02.trig | 6 + test/trig-star/trig-star-syntax-bnode-03.trig | 3 + test/trig-star/trig-star-syntax-compound.trig | 12 + .../trig-star/trig-star-syntax-inside-01.trig | 6 + .../trig-star/trig-star-syntax-inside-02.trig | 7 + .../trig-star/trig-star-syntax-nested-01.trig | 9 + .../trig-star/trig-star-syntax-nested-02.trig | 7 + test/turtle-star/nt-ttl-star-bad-syntax-1.ttl | 1 + test/turtle-star/nt-ttl-star-bad-syntax-2.ttl | 1 + test/turtle-star/nt-ttl-star-bad-syntax-3.ttl | 1 + test/turtle-star/nt-ttl-star-bad-syntax-4.ttl | 1 + test/turtle-star/nt-ttl-star-bnode-1.ttl | 2 + test/turtle-star/nt-ttl-star-bnode-2.ttl | 2 + test/turtle-star/nt-ttl-star-nested-1.ttl | 3 + test/turtle-star/nt-ttl-star-nested-2.ttl | 3 + test/turtle-star/nt-ttl-star-syntax-1.ttl | 1 + test/turtle-star/nt-ttl-star-syntax-2.ttl | 1 + test/turtle-star/nt-ttl-star-syntax-3.ttl | 1 + test/turtle-star/nt-ttl-star-syntax-4.ttl | 1 + test/turtle-star/nt-ttl-star-syntax-5.ttl | 1 + test/turtle-star/test1234.ttl | 23 + test/turtle-star/test_rdfliteral.ttl | 5 + test/turtle-star/turtle-star-annotation-1.ttl | 3 + test/turtle-star/turtle-star-annotation-2.ttl | 10 + .../turtle-star/turtle-star-syntax-bad-01.ttl | 14 + .../turtle-star/turtle-star-syntax-bad-02.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-03.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-04.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-05.ttl | 3 + .../turtle-star/turtle-star-syntax-bad-06.ttl | 4 + .../turtle-star/turtle-star-syntax-bad-07.ttl | 3 + .../turtle-star/turtle-star-syntax-bad-08.ttl | 3 + .../turtle-star-syntax-bad-ann-1.ttl | 6 + .../turtle-star-syntax-bad-ann-2.ttl | 3 + .../turtle-star-syntax-basic-01.ttl | 4 + .../turtle-star-syntax-basic-02.ttl | 4 + .../turtle-star-syntax-bnode-01.ttl | 4 + .../turtle-star-syntax-bnode-02.ttl | 4 + .../turtle-star-syntax-bnode-03.ttl | 3 + .../turtle-star-syntax-compound.ttl | 11 + .../turtle-star-syntax-inside-01.ttl | 4 + .../turtle-star-syntax-inside-02.ttl | 5 + .../turtle-star-syntax-nested-01.ttl | 7 + .../turtle-star-syntax-nested-02.ttl | 5 + test_serializer_ntriplesstar.py | 56 + test_serializer_trigstar.py | 68 + test_serializer_turtlestar.py | 104 + 122 files changed, 10360 insertions(+) create mode 100644 rdflib/plugins/parsers/ntriples-star.py create mode 100644 rdflib/plugins/parsers/trigstar.py create mode 100644 rdflib/plugins/parsers/turtlestar.py create mode 100644 rdflib/plugins/serializers/ntriples-star.py create mode 100644 rdflib/plugins/serializers/trigstar.py create mode 100644 rdflib/plugins/serializers/turtlestar.py create mode 100644 test/ntriples-star/ntriples-star-bad-syntax-1.nt create mode 100644 test/ntriples-star/ntriples-star-bad-syntax-2.nt create mode 100644 test/ntriples-star/ntriples-star-bad-syntax-3.nt create mode 100644 test/ntriples-star/ntriples-star-bad-syntax-4.nt create mode 100644 test/ntriples-star/ntriples-star-bnode-1.nt create mode 100644 test/ntriples-star/ntriples-star-bnode-2.nt create mode 100644 test/ntriples-star/ntriples-star-nested-1.nt create mode 100644 test/ntriples-star/ntriples-star-nested-2.nt create mode 100644 test/ntriples-star/ntriples-star-syntax-1.nt create mode 100644 test/ntriples-star/ntriples-star-syntax-2.nt create mode 100644 test/ntriples-star/ntriples-star-syntax-3.nt create mode 100644 test/ntriples-star/ntriples-star-syntax-4.nt create mode 100644 test/ntriples-star/ntriples-star-syntax-5.nt create mode 100644 test/rdf-star/test_Rdfstar.py create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl create mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-annotation-1.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-annotation-2.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl create mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl create mode 100644 test/test_parser_ntriplesstar.py create mode 100644 test/test_parser_trigstar.py create mode 100644 test/test_parser_turtlestar.py create mode 100644 test/trig-star/trig-star-annotation-1.trig create mode 100644 test/trig-star/trig-star-annotation-2.trig create mode 100644 test/trig-star/trig-star-syntax-bad-01.trig create mode 100644 test/trig-star/trig-star-syntax-bad-02.trig create mode 100644 test/trig-star/trig-star-syntax-bad-03.trig create mode 100644 test/trig-star/trig-star-syntax-bad-04.trig create mode 100644 test/trig-star/trig-star-syntax-bad-05.trig create mode 100644 test/trig-star/trig-star-syntax-bad-06.trig create mode 100644 test/trig-star/trig-star-syntax-bad-07.trig create mode 100644 test/trig-star/trig-star-syntax-bad-08.trig create mode 100644 test/trig-star/trig-star-syntax-bad-ann-1.trig create mode 100644 test/trig-star/trig-star-syntax-bad-ann-2.trig create mode 100644 test/trig-star/trig-star-syntax-basic-01.trig create mode 100644 test/trig-star/trig-star-syntax-basic-02.trig create mode 100644 test/trig-star/trig-star-syntax-bnode-01.trig create mode 100644 test/trig-star/trig-star-syntax-bnode-02.trig create mode 100644 test/trig-star/trig-star-syntax-bnode-03.trig create mode 100644 test/trig-star/trig-star-syntax-compound.trig create mode 100644 test/trig-star/trig-star-syntax-inside-01.trig create mode 100644 test/trig-star/trig-star-syntax-inside-02.trig create mode 100644 test/trig-star/trig-star-syntax-nested-01.trig create mode 100644 test/trig-star/trig-star-syntax-nested-02.trig create mode 100644 test/turtle-star/nt-ttl-star-bad-syntax-1.ttl create mode 100644 test/turtle-star/nt-ttl-star-bad-syntax-2.ttl create mode 100644 test/turtle-star/nt-ttl-star-bad-syntax-3.ttl create mode 100644 test/turtle-star/nt-ttl-star-bad-syntax-4.ttl create mode 100644 test/turtle-star/nt-ttl-star-bnode-1.ttl create mode 100644 test/turtle-star/nt-ttl-star-bnode-2.ttl create mode 100644 test/turtle-star/nt-ttl-star-nested-1.ttl create mode 100644 test/turtle-star/nt-ttl-star-nested-2.ttl create mode 100644 test/turtle-star/nt-ttl-star-syntax-1.ttl create mode 100644 test/turtle-star/nt-ttl-star-syntax-2.ttl create mode 100644 test/turtle-star/nt-ttl-star-syntax-3.ttl create mode 100644 test/turtle-star/nt-ttl-star-syntax-4.ttl create mode 100644 test/turtle-star/nt-ttl-star-syntax-5.ttl create mode 100644 test/turtle-star/test1234.ttl create mode 100644 test/turtle-star/test_rdfliteral.ttl create mode 100644 test/turtle-star/turtle-star-annotation-1.ttl create mode 100644 test/turtle-star/turtle-star-annotation-2.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-01.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-02.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-03.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-04.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-05.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-06.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-07.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-08.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-ann-1.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bad-ann-2.ttl create mode 100644 test/turtle-star/turtle-star-syntax-basic-01.ttl create mode 100644 test/turtle-star/turtle-star-syntax-basic-02.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bnode-01.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bnode-02.ttl create mode 100644 test/turtle-star/turtle-star-syntax-bnode-03.ttl create mode 100644 test/turtle-star/turtle-star-syntax-compound.ttl create mode 100644 test/turtle-star/turtle-star-syntax-inside-01.ttl create mode 100644 test/turtle-star/turtle-star-syntax-inside-02.ttl create mode 100644 test/turtle-star/turtle-star-syntax-nested-01.ttl create mode 100644 test/turtle-star/turtle-star-syntax-nested-02.ttl create mode 100644 test_serializer_ntriplesstar.py create mode 100644 test_serializer_trigstar.py create mode 100644 test_serializer_turtlestar.py diff --git a/rdflib/plugin.py b/rdflib/plugin.py index 9d2f8540b..501bb41c8 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -264,6 +264,24 @@ def plugins( "rdflib.plugins.serializers.n3", "N3Serializer", ) +register( + "ntstar", + Serializer, + "rdflib.plugins.serializers.ntriples-star", + "NtriplesStarSerializer" +) +register( + "ttlstar", + Serializer, + "rdflib.plugins.serializers.turtlestar", + "TurtlestarSerializer" +) +register( + "trigstar", + Serializer, + "rdflib.plugins.serializers.trigstar", + "TrigstarSerializer" +) register( "text/turtle", Serializer, @@ -394,6 +412,24 @@ def plugins( "rdflib.plugins.parsers.notation3", "N3Parser", ) +register( + "ntstar", + Parser, + "rdflib.plugins.parsers.ntriples-star", + "NtriplesStarParser", +) +register( + "ttls", + Parser, + "rdflib.plugins.parsers.turtlestar", + "TurtleParser", +) +register( + "trigs", + Parser, + "rdflib.plugins.parsers.trigstar", + "TrigParser", +) register( "text/turtle", Parser, diff --git a/rdflib/plugins/parsers/ntriples-star.py b/rdflib/plugins/parsers/ntriples-star.py new file mode 100644 index 000000000..ec550cc2a --- /dev/null +++ b/rdflib/plugins/parsers/ntriples-star.py @@ -0,0 +1,2387 @@ +#!/usr/bin/env python +""" +notation3.py - Standalone Notation3 Parser +Derived from CWM, the Closed World Machine +Authors of the original suite: +* Dan Connolly <@@> +* Tim Berners-Lee <@@> +* Yosi Scharf <@@> +* Joseph M. Reagle Jr. +* Rich Salz +http://www.w3.org/2000/10/swap/notation3.py +Copyright 2000-2007, World Wide Web Consortium. +Copyright 2001, MIT. +Copyright 2001, Zolera Systems Inc. +License: W3C Software License +http://www.w3.org/Consortium/Legal/copyright-software +Modified by Sean B. Palmer +Copyright 2007, Sean B. Palmer. +Modified to work with rdflib by Gunnar Aastrand Grimnes +Copyright 2010, Gunnar A. Grimnes +""" +import codecs +import os +import re +import sys +import rdflib + +# importing typing for `typing.List` because `List`` is used for something else +import typing +from decimal import Decimal +from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union +from uuid import uuid4 + +from rdflib.compat import long_type +from rdflib.exceptions import ParserError +from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph +from rdflib.term import ( + _XSD_PFX, + RdfstarTriple, + BNode, + Identifier, + Literal, + Node, + URIRef, + Variable, + _unique_id, +) + +__all__ = [ + "BadSyntax", + "N3Parser", + "NtriplesStarParser", + "splitFragP", + "join", + "base", + "runNamespace", + "uniqueURI", + "hexify", +] + +from rdflib.parser import Parser + +if TYPE_CHECKING: + from rdflib.parser import InputSource + +AnyT = TypeVar("AnyT") + + +def splitFragP(uriref, punct=0): + """split a URI reference before the fragment + Punctuation is kept. + e.g. + >>> splitFragP("abc#def") + ('abc', '#def') + >>> splitFragP("abcdef") + ('abcdef', '') + """ + + i = uriref.rfind("#") + if i >= 0: + return uriref[:i], uriref[i:] + else: + return uriref, "" + + +def join(here, there): + """join an absolute URI and URI reference + (non-ascii characters are supported/doctested; + haven't checked the details of the IRI spec though) + ``here`` is assumed to be absolute. + ``there`` is URI reference. + >>> join('http://example/x/y/z', '../abc') + 'http://example/x/abc' + Raise ValueError if there uses relative path + syntax but here has no hierarchical path. + >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE + Traceback (most recent call last): + raise ValueError(here) + ValueError: Base has no slash + after colon - with relative '../foo'. + >>> join('http://example/x/y/z', '') + 'http://example/x/y/z' + >>> join('mid:foo@example', '#foo') + 'mid:foo@example#foo' + We grok IRIs + >>> len(u'Andr\\xe9') + 5 + >>> join('http://example.org/', u'#Andr\\xe9') + u'http://example.org/#Andr\\xe9' + """ + + # assert(here.find("#") < 0), \ + # "Base may not contain hash: '%s'" % here # why must caller splitFrag? + + slashl = there.find("/") + colonl = there.find(":") + + # join(base, 'foo:/') -- absolute + if colonl >= 0 and (slashl < 0 or colonl < slashl): + return there + + bcolonl = here.find(":") + assert bcolonl >= 0, ( + "Base uri '%s' is not absolute" % here + ) # else it's not absolute + + path, frag = splitFragP(there) + if not path: + return here + frag + + # join('mid:foo@example', '../foo') bzzt + if here[bcolonl + 1 : bcolonl + 2] != "/": + raise ValueError( + "Base <%s> has no slash after " + "colon - with relative '%s'." % (here, there) + ) + + if here[bcolonl + 1 : bcolonl + 3] == "//": + bpath = here.find("/", bcolonl + 3) + else: + bpath = bcolonl + 1 + + # join('http://xyz', 'foo') + if bpath < 0: + bpath = len(here) + here = here + "/" + + # join('http://xyz/', '//abc') => 'http://abc' + if there[:2] == "//": + return here[: bcolonl + 1] + there + + # join('http://xyz/', '/abc') => 'http://xyz/abc' + if there[:1] == "/": + return here[:bpath] + there + + slashr = here.rfind("/") + + while 1: + if path[:2] == "./": + path = path[2:] + if path == ".": + path = "" + elif path[:3] == "../" or path == "..": + path = path[3:] + i = here.rfind("/", bpath, slashr) + if i >= 0: + here = here[: i + 1] + slashr = i + else: + break + + return here[: slashr + 1] + path + frag + + +def base(): + """The base URI for this process - the Web equiv of cwd + Relative or absolute unix-standard filenames parsed relative to + this yield the URI of the file. + If we had a reliable way of getting a computer name, + we should put it in the hostname just to prevent ambiguity + """ + # return "file://" + hostname + os.getcwd() + "/" + return "file://" + _fixslash(os.getcwd()) + "/" + + +def _fixslash(s): + """Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" + s = s.replace("\\", "/") + if s[0] != "/" and s[1] == ":": + s = s[2:] # @@@ Hack when drive letter present + return s + + +CONTEXT = 0 +PRED = 1 +SUBJ = 2 +OBJ = 3 + +PARTS = PRED, SUBJ, OBJ +ALL4 = CONTEXT, PRED, SUBJ, OBJ + +SYMBOL = 0 +FORMULA = 1 +LITERAL = 2 +LITERAL_DT = 21 +LITERAL_LANG = 22 +ANONYMOUS = 3 +XMLLITERAL = 25 + +Logic_NS = "http://www.w3.org/2000/10/swap/log#" +NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging +forSomeSym = Logic_NS + "forSome" +forAllSym = Logic_NS + "forAll" + +RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" +RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +OWL_NS = "http://www.w3.org/2002/07/owl#" +DAML_sameAs_URI = OWL_NS + "sameAs" +parsesTo_URI = Logic_NS + "parsesTo" +RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" + +List_NS = RDF_NS_URI # From 20030808 +_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" + +N3_first = (SYMBOL, List_NS + "first") +N3_rest = (SYMBOL, List_NS + "rest") +N3_li = (SYMBOL, List_NS + "li") +N3_nil = (SYMBOL, List_NS + "nil") +N3_List = (SYMBOL, List_NS + "List") +N3_Empty = (SYMBOL, List_NS + "Empty") + + +runNamespaceValue = None + + +def runNamespace(): + """Returns a URI suitable as a namespace for run-local objects""" + # @@@ include hostname (privacy?) (hash it?) + global runNamespaceValue + if runNamespaceValue is None: + runNamespaceValue = join(base(), _unique_id()) + "#" + return runNamespaceValue + + +nextu = 0 + +import re +import lark +import hashlib +from lark import ( + Lark, + Transformer, + Tree, +) +from lark.visitors import Visitor +from lark.reconstruct import Reconstructor + +from lark.lexer import ( + Token, +) + +# from pymantic.compat import ( +# binary_type, +# ) +# from pymantic.parsers.base import ( +# BaseParser, +# ) +# from pymantic.primitives import ( +# BlankNode, +# Literal, +# NamedNode, +# Triple, +# ) +# from pymantic.util import ( +# grouper, +# smart_urljoin, +# decode_literal, +# ) + +grammar = r"""ntriplesdoc: statement* +?statement: directive | triples "." | quotedtriples "." +directive: prefix_id | base | sparql_prefix | sparql_base +prefix_id: "@prefix" PNAME_NS IRIREF "." +base: BASE_DIRECTIVE IRIREF "." +sparql_base: /BASE/i IRIREF +sparql_prefix: /PREFIX/i PNAME_NS IRIREF +triples: subject predicate_object_list + | blank_node_property_list predicate_object_list? +insidequotation: qtsubject verb qtobject +quotedtriples: triples compoundanno +predicate_object_list: verb object_list (";" (verb object_list)?)* +?object_list: object ("," object)* +?verb: predicate | /a/ +?subject: iri | blank_node | collection | quotation +?predicate: iri +?object: iri | blank_node | collection | blank_node_property_list | literal | quotation +?literal: rdf_literal | numeric_literal | boolean_literal +?qtsubject: iri | blank_node | quotation +?qtobject: iri | blank_node | literal | quotation +quotation: "<<" insidequotation ">>" +COMPOUNDL: "{|" +COMPOUNDR: "|}" +compoundanno: COMPOUNDL predicate_object_list COMPOUNDR +blank_node_property_list: "[" predicate_object_list "]" +collection: "(" object* ")" +numeric_literal: INTEGER | DECIMAL | DOUBLE +rdf_literal: string (LANGTAG | "^^" iri)? +boolean_literal: /true|false/ +string: STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE + | STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE +iri: IRIREF | prefixed_name +prefixed_name: PNAME_LN | PNAME_NS +blank_node: BLANK_NODE_LABEL | ANON + +BASE_DIRECTIVE: "@base" +IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" +PNAME_NS: PN_PREFIX? ":" +PNAME_LN: PNAME_NS PN_LOCAL +BLANK_NODE_LABEL: "_:" (PN_CHARS_U | /[0-9]/) ((PN_CHARS | ".")* PN_CHARS)? +LANGTAG: "@" /[a-zA-Z]+/ ("-" /[a-zA-Z0-9]+/)* +INTEGER: /[+-]?[0-9]+/ +DECIMAL: /[+-]?[0-9]*/ "." /[0-9]+/ +DOUBLE: /[+-]?/ (/[0-9]+/ "." /[0-9]*/ EXPONENT + | "." /[0-9]+/ EXPONENT | /[0-9]+/ EXPONENT) +EXPONENT: /[eE][+-]?[0-9]+/ +STRING_LITERAL_QUOTE: "\"" (/[^\x22\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "\"" +STRING_LITERAL_SINGLE_QUOTE: "'" (/[^\x27\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "'" +STRING_LITERAL_LONG_SINGLE_QUOTE: "'''" (/'|''/? (/[^'\\]/ | ECHAR | UCHAR))* "'''" +STRING_LITERAL_LONG_QUOTE: "\"\"\"" (/"|""/? (/[^"\\]/ | ECHAR | UCHAR))* "\"\"\"" +UCHAR: "\\u" HEX~4 | "\\U" HEX~8 +ECHAR: "\\" /[tbnrf"'\\]/ +WS: /[\x20\x09\x0D\x0A]/ +ANON: "[" WS* "]" +PN_CHARS_BASE: /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF]/ +PN_CHARS_U: PN_CHARS_BASE | "_" +PN_CHARS: PN_CHARS_U | /[\-0-9\u00B7\u0300-\u036F\u203F-\u2040]/ +PN_PREFIX: PN_CHARS_BASE ((PN_CHARS | ".")* PN_CHARS)? +PN_LOCAL: (PN_CHARS_U | ":" | /[0-9]/ | PLX) ((PN_CHARS | "." | ":" | PLX)* (PN_CHARS | ":" | PLX))? +PLX: PERCENT | PN_LOCAL_ESC +PERCENT: "%" HEX~2 +HEX: /[0-9A-Fa-f]/ +PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ + +%ignore WS +COMMENT: "#" /[^\n]/* +%ignore COMMENT +""" + +# r"""ntriplesdoc: statement* +# ?statement: triples "." | triples "."? (EOL triples ".")* EOL? +# triples: subject predicate object +# ?verb: predicate | /a/ +# ?subject: IRIREF | BLANK_NODE_LABEL | quotation +# ?predicate: IRIREF +# ?object: IRIREF | BLANK_NODE_LABEL | literal | quotation +# ?literal: rdf_literal | numeric_literal | boolean_literal +# ANGLEBRACKETL: "<<" +# ANGLEBRACKETR: ">>" +# quotation: ANGLEBRACKETL triples ANGLEBRACKETR +# numeric_literal: INTEGER | DECIMAL | DOUBLE +# rdf_literal: string (LANGTAG | "^^" iri)? +# boolean_literal: /true|false/ +# string: STRING_LITERAL_QUOTE +# | STRING_LITERAL_SINGLE_QUOTE +# | STRING_LITERAL_LONG_SINGLE_QUOTE +# | STRING_LITERAL_LONG_QUOTE +# iri: IRIREF | prefixed_name +# prefixed_name: PNAME_LN | PNAME_NS + +# EOL: /[#xD#xA]+/ +# IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" +# PNAME_NS: PN_PREFIX? ":" +# PNAME_LN: PNAME_NS PN_LOCAL +# BLANK_NODE_LABEL: "_:" (PN_CHARS_U | /[0-9]/) ((PN_CHARS | ".")* PN_CHARS)? +# LANGTAG: "@" /[a-zA-Z]+/ ("-" /[a-zA-Z0-9]+/)* +# INTEGER: /[+-]?[0-9]+/ +# DECIMAL: /[+-]?[0-9]*/ "." /[0-9]+/ +# DOUBLE: /[+-]?/ (/[0-9]+/ "." /[0-9]*/ EXPONENT +# | "." /[0-9]+/ EXPONENT | /[0-9]+/ EXPONENT) +# EXPONENT: /[eE][+-]?[0-9]+/ +# STRING_LITERAL_QUOTE: "\"" (/[^\x22\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "\"" +# STRING_LITERAL_SINGLE_QUOTE: "'" (/[^\x27\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "'" +# STRING_LITERAL_LONG_SINGLE_QUOTE: "'''" (/'|''/? (/[^'\\]/ | ECHAR | UCHAR))* "'''" +# STRING_LITERAL_LONG_QUOTE: "\"\"\"" (/"|""/? (/[^"\\]/ | ECHAR | UCHAR))* "\"\"\"" +# UCHAR: "\\u" HEX~4 | "\\U" HEX~8 +# ECHAR: "\\" /[tbnrf"'\\]/ +# WS: /[\x20\x09\x0D\x0A]/ +# ANON: "[" WS* "]" +# PN_CHARS_BASE: /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF]/ +# PN_CHARS_U: PN_CHARS_BASE | "_" +# PN_CHARS: PN_CHARS_U | /[\-0-9\u00B7\u0300-\u036F\u203F-\u2040]/ +# PN_PREFIX: PN_CHARS_BASE ((PN_CHARS | ".")* PN_CHARS)? +# PN_LOCAL: (PN_CHARS_U | ":" | /[0-9]/ | PLX) ((PN_CHARS | "." | ":" | PLX)* (PN_CHARS | ":" | PLX))? +# PLX: PERCENT | PN_LOCAL_ESC +# PERCENT: "%" HEX~2 +# HEX: /[0-9A-Fa-f]/ +# PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ + +# %ignore WS +# COMMENT: "#" /[^\n]/* +# %ignore COMMENT +# """ + +ntriples_lark = Lark(grammar, start="ntriplesdoc", parser="lalr", maybe_placeholders=False) + +from lark import Visitor, v_args +quotation_list = [] +quotation_dict = dict() +vblist = [] +quotationreif = [] +prefix_list = [] +quotationannolist = [] +constructors = "" +quoted_or_not = False +both_quoted_and_asserted = False +assertedtriplelist = [] + +def myHash(text:str): + return str(hashlib.md5(text.encode('utf-8')).hexdigest()) + +class FindVariables(Visitor): + def __init__(self): + super().__init__() + self.variable_list = [] + + def quotation(self, var): + + qut = Reconstructor(ntriples_lark).reconstruct(var) + qut = qut.replace(";", "") + if not (qut in quotation_list): + quotation_list.append(qut) + + vr = Reconstructor(ntriples_lark).reconstruct(var) + vr = vr.replace(";","") + quotation_dict[qut] = str(myHash(qut)) + "RdfstarTriple" + qut_hash = ":" + str(myHash(qut)) + id = quotation_dict.get(vr) + for x in quotation_dict: + if x in vr: + vr = vr.replace(x, ":"+quotation_dict.get(x)) + vr = vr.replace("<<", "") + vr = vr.replace(">>", "") + output = vr.split(":") + output.pop(0) + oa1 = Reconstructor(ntriples_lark).reconstruct(var) + oa1 = oa1.replace(";","") + output.append(oa1) + if (not (output in quotationreif)): + quotationreif.append(output) + + def insidequotation(self, var): + appends1 = [] + for x in var.children: + x1 = Reconstructor(ntriples_lark).reconstruct(x) + x1 = x1.replace(";","") + appends1.append(x1) + + if not (appends1 in vblist): + vblist.append(appends1) + + def triples(self, var): + + appends1 = [] + tri = Reconstructor(ntriples_lark).reconstruct(var) + tri = tri.replace(";", "") + if not (tri in assertedtriplelist): + assertedtriplelist.append(tri) + for x in var.children: + if x.data == 'predicate_object_list': + xc = x.children + for y in xc: + x2 = Reconstructor(ntriples_lark).reconstruct(y) + x2 = x2.replace(";","") + appends1.append(x2) + else: + anyquotationin = False + x1 = Reconstructor(ntriples_lark).reconstruct(x) + x1 = x1.replace(";","") + appends1.append(x1) + + if not (appends1 in vblist): + vblist.append(appends1) + +def RDFstarParsings(rdfstarstring): + global quotationannolist, quotation_dict, vblist, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted + quotationannolist = [] + vblist = [] + quotationreif = [] + prefix_list = [] + constructors = "" + quoted_or_not = False + both_quoted_and_asserted = False + tree = ntriples_lark.parse(rdfstarstring) + at = FindVariables().visit(tree) + + for y in vblist: + for element_index in range(0, len(y)): + if (y[element_index][0] == "_") & (not (element_index == 0)): + y[element_index]=" "+y[element_index] + result = "".join(y) + result = result.replace(" ", "") + if result in assertedtriplelist: + test1 = "<<"+result+">>" + if test1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + else: + test2 = "<<"+result+">>" + if test2 in quotation_list: + both_quoted_and_asserted = False + quoted_or_not = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + result = "<<"+result+">>" + if not (result in quotation_list): + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = "_:"+quotation_dict[y[z]] + myvalue = str(myHash(result)) + subject = y[0] + predicate = y[1] + object = y[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + else: + value = quotation_dict[result] + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = "_:"+quotation_dict[y[z]] + subject = y[0] + predicate = y[1] + object = y[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + + for z in quotationannolist: + result1 = "".join(z) + result1 = "<<"+result1+">>" + if result1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + value = str(myHash(result1)) + subject = z[0] + predicate = z[1] + object = z[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + + for x in range(0, len(prefix_list)): + prefix_list[x] = Reconstructor(ntriples_lark).reconstruct(prefix_list[x]) + constructors = prefix_list[x]+"\n"+constructors + + constructors = "PREFIX rdfstar: \n"+constructors + + constructors = "PREFIX rdf: \n"+constructors + + if not (("PREFIX : " in constructors) or ("PREFIX:" in constructors)): + constructors = "PREFIX : \n"+constructors + + if "PREFIX:" in constructors: + constructors = constructors.replace("PREFIX:", "PREFIX :") + + print("yes?", constructors) + constructors = bytes(constructors, 'utf-8') + return constructors + +def uniqueURI(): + """A unique URI""" + global nextu + nextu += 1 + return runNamespace() + "u_" + str(nextu) + +tracking = False +chatty_flag = 50 + +# from why import BecauseOfData, becauseSubexpression + +def BecauseOfData(*args, **kargs): + # print args, kargs + pass + + +def becauseSubexpression(*args, **kargs): + # print args, kargs + pass + + +N3_forSome_URI = forSomeSym +N3_forAll_URI = forAllSym + +# Magic resources we know about + +ADDED_HASH = "#" # Stop where we use this in case we want to remove it! +# This is the hash on namespace URIs + +RDF_type = (SYMBOL, RDF_type_URI) +DAML_sameAs = (SYMBOL, DAML_sameAs_URI) + +LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" + +BOOLEAN_DATATYPE = _XSD_PFX + "boolean" +DECIMAL_DATATYPE = _XSD_PFX + "decimal" +DOUBLE_DATATYPE = _XSD_PFX + "double" +FLOAT_DATATYPE = _XSD_PFX + "float" +INTEGER_DATATYPE = _XSD_PFX + "integer" + +option_noregen = 0 # If set, do not regenerate genids on output + +# @@ I18n - the notname chars need extending for well known unicode non-text +# characters. The XML spec switched to assuming unknown things were name +# characters. +# _namechars = string.lowercase + string.uppercase + string.digits + '_-' +_notQNameChars = set("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ +_notKeywordsChars = _notQNameChars | {"."} +_notNameChars = _notQNameChars | {":"} # Assume anything else valid name :-/ +_rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +hexChars = set("ABCDEFabcdef0123456789") +escapeChars = set("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames +numberChars = set("0123456789-") +numberCharsPlus = numberChars | {"+", "."} + + +def unicodeExpand(m): + try: + return chr(int(m.group(1), 16)) + except: + raise Exception("Invalid unicode code point: " + m.group(1)) + + +unicodeEscape4 = re.compile(r"\\u([0-9a-fA-F]{4})") +unicodeEscape8 = re.compile(r"\\U([0-9a-fA-F]{8})") + + +N3CommentCharacter = "#" # For unix script # ! compatibility + +# Parse string to sink +# +# Regular expressions: +eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment +eof = re.compile(r"[ \t]*(#[^\n]*)?$") # end of file, poss. w/comment +ws = re.compile(r"[ \t]*") # Whitespace not including NL +signed_integer = re.compile(r"[-+]?[0-9]+") # integer +integer_syntax = re.compile(r"[-+]?[0-9]+") +decimal_syntax = re.compile(r"[-+]?[0-9]*\.[0-9]+") +exponent_syntax = re.compile( + r"[-+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:e|E)[-+]?[0-9]+" +) +digitstring = re.compile(r"[0-9]+") # Unsigned integer +interesting = re.compile(r"""[\\\r\n\"\']""") +langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") + +quoted_triple_list = [] +class SinkParser: + def __init__( + self, + store: "RDFSink", + openFormula: Optional["Formula"] = None, + thisDoc: str = "", + baseURI: Optional[str] = None, + genPrefix: str = "", + why: Optional[Callable[[], None]] = None, + turtle: bool = False, + ): + """note: namespace names should *not* end in # ; + the # will get added during qname processing""" + + self._bindings = {} + if thisDoc != "": + assert ":" in thisDoc, "Document URI not absolute: <%s>" % thisDoc + self._bindings[""] = thisDoc + "#" # default + + self._store = store + if genPrefix: + # TODO FIXME: there is no function named setGenPrefix + store.setGenPrefix(genPrefix) # type: ignore[attr-defined] # pass it on + + self._thisDoc = thisDoc + self.lines = 0 # for error handling + self.startOfLine = 0 # For calculating character number + self._genPrefix = genPrefix + self.keywords = ["a", "this", "bind", "has", "is", "of", "true", "false"] + self.keywordsSet = 0 # Then only can others be considered qnames + self._anonymousNodes: Dict[str, Node] = {} + self._rdfstartripleNodes: Dict[str, Node] = {} + # Dict of anon nodes already declared ln: Term + self._variables: Dict[Identifier, Identifier] = {} + self._parentVariables: Dict[Identifier, Identifier] = {} + self._reason = why # Why the parser was asked to parse this + + self.turtle = turtle # raise exception when encountering N3 extensions + # Turtle allows single or double quotes around strings, whereas N3 + # only allows double quotes. + self.string_delimiters = ('"', "'") if turtle else ('"',) + + self._reason2 = None # Why these triples + # was: diag.tracking + if tracking: + self._reason2 = BecauseOfData( + store.newSymbol(thisDoc), because=self._reason + ) + + self._baseURI: Optional[str] + if baseURI: + self._baseURI = baseURI + else: + if thisDoc: + self._baseURI = thisDoc + else: + self._baseURI = None + + assert not self._baseURI or ":" in self._baseURI + + if not self._genPrefix: + if self._thisDoc: + self._genPrefix = self._thisDoc + "#_g" + else: + self._genPrefix = uniqueURI() + + self._formula: Formula + if openFormula is None and not turtle: + if self._thisDoc: + # TODO FIXME: store.newFormula does not take any arguments + self._formula = store.newFormula(thisDoc + "#_formula") # type: ignore[call-arg] + else: + self._formula = store.newFormula() + else: + self._formula = openFormula # type: ignore[assignment] + + self._context = self._formula + self._parentContext: Optional[Formula] = None + + def here(self, i: int) -> str: + """String generated from position in file + This is for repeatability when referring people to bnodes in a document. + This has diagnostic uses less formally, as it should point one to which + bnode the arbitrary identifier actually is. It gives the + line and character number of the '[' charcacter or path character + which introduced the blank node. The first blank node is boringly + _L1C1. It used to be used only for tracking, but for tests in general + it makes the canonical ordering of bnodes repeatable.""" + + return "%s_L%iC%i" % (self._genPrefix, self.lines, i - self.startOfLine + 1) + + def formula(self): + return self._formula + + def loadStream(self, stream: Union[IO[str], IO[bytes]]) -> Optional["Formula"]: + return self.loadBuf(stream.read()) # Not ideal + + def loadBuf(self, buf: Union[str, bytes]): + """Parses a buffer and returns its top level formula""" + self.startDoc() + + self.feed(buf) + return self.endDoc() # self._formula + + def feed(self, octets: Union[str, bytes]): + """Feed an octet stream to the parser + if BadSyntax is raised, the string + passed in the exception object is the + remainder after any statements have been parsed. + So if there is more data to feed to the + parser, it should be straightforward to recover.""" + + if not isinstance(octets, str): + s = octets.decode("utf-8") + # NB already decoded, so \ufeff + if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode("utf-8"): + s = s[1:] + else: + s = octets + + i = 0 + while i >= 0: + j = self.skipSpace(s, i) + if j < 0: + return + + i = self.directiveOrStatement(s, j) + if i < 0: + # print("# next char: %s" % s) + self.BadSyntax(s, j, "expected directive or statement") + + def directiveOrStatement(self, argstr: str, h: int) -> int: + + i = self.skipSpace(argstr, h) + if i < 0: + return i # EOF + + if self.turtle: + j = self.sparqlDirective(argstr, i) + if j >= 0: + return j + + j = self.directive(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + + j = self.statement(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + + return j + + # @@I18N + # _namechars = string.lowercase + string.uppercase + string.digits + '_-' + + def tok(self, tok: str, argstr: str, i: int, colon: bool = False): + """Check for keyword. Space must have been stripped on entry and + we must not be at end of file. + if colon, then keyword followed by colon is ok + (@prefix: is ok, rdf:type shortcut a must be followed by ws) + """ + + assert tok[0] not in _notNameChars # not for punctuation + if argstr[i] == "@": + i += 1 + else: + if tok not in self.keywords: + return -1 # No, this has neither keywords declaration nor "@" + + i_plus_len_tok = i + len(tok) + if ( + argstr[i:i_plus_len_tok] == tok + and (argstr[i_plus_len_tok] in _notKeywordsChars) + or (colon and argstr[i_plus_len_tok] == ":") + ): + return i_plus_len_tok + else: + return -1 + + def sparqlTok(self, tok: str, argstr: str, i: int) -> int: + """Check for SPARQL keyword. Space must have been stripped on entry + and we must not be at end of file. + Case insensitive and not preceded by @ + """ + + assert tok[0] not in _notNameChars # not for punctuation + + len_tok = len(tok) + if argstr[i : i + len_tok].lower() == tok.lower() and ( + argstr[i + len_tok] in _notQNameChars + ): + i += len_tok + return i + else: + return -1 + + def directive(self, argstr: str, i: int) -> int: + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + res: typing.List[Any] = [] + + j = self.tok("bind", argstr, i) # implied "#". Obsolete. + if j > 0: + self.BadSyntax(argstr, i, "keyword bind is obsolete: use @prefix") + + j = self.tok("keywords", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.bareWord) + if i < 0: + self.BadSyntax( + argstr, i, "'@keywords' needs comma separated list of words" + ) + self.setKeywords(res[:]) + return i + + j = self.tok("forAll", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) + if i < 0: + self.BadSyntax(argstr, i, "Bad variable list after @forAll") + for x in res: + # self._context.declareUniversal(x) + if x not in self._variables or x in self._parentVariables: + self._variables[x] = self._context.newUniversal(x) + return i + + j = self.tok("forSome", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) + if i < 0: + self.BadSyntax(argstr, i, "Bad variable list after @forSome") + for x in res: + self._context.declareExistential(x) + return i + + j = self.tok("prefix", argstr, i, colon=True) # no implied "#" + if j >= 0: + t: typing.List[Any] = [] + i = self.qname(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected qname after @prefix") + j = self.uri_ref2(argstr, i, t) + if j < 0: + self.BadSyntax(argstr, i, "expected after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + self.BadSyntax( + argstr, + j, + f"With no base URI, cannot use relative URI in @prefix <{ns}>", + ) + assert ":" in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j = self.tok("base", argstr, i) # Added 2007/7/7 + if j >= 0: + t = [] + i = self.uri_ref2(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + self.BadSyntax( + argstr, + j, + "With no previous base URI, cannot use " + + "relative URI in @base <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def sparqlDirective(self, argstr: str, i: int): + + """ + turtle and trig support BASE/PREFIX without @ and without + terminating . + """ + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + + j = self.sparqlTok("PREFIX", argstr, i) + if j >= 0: + t: typing.List[Any] = [] + i = self.qname(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected qname after @prefix") + j = self.uri_ref2(argstr, i, t) + if j < 0: + self.BadSyntax(argstr, i, "expected after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + self.BadSyntax( + argstr, + j, + "With no base URI, cannot use " + + "relative URI in @prefix <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j = self.sparqlTok("BASE", argstr, i) + if j >= 0: + t = [] + i = self.uri_ref2(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + self.BadSyntax( + argstr, + j, + "With no previous base URI, cannot use " + + "relative URI in @base <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def bind(self, qn: str, uri: bytes) -> None: + assert isinstance(uri, bytes), "Any unicode must be %x-encoded already" + if qn == "": + self._store.setDefaultNamespace(uri) + else: + self._store.bind(qn, uri) + + def setKeywords(self, k: Optional[typing.List[str]]): + """Takes a list of strings""" + if k is None: + self.keywordsSet = 0 + else: + self.keywords = k + self.keywordsSet = 1 + + def startDoc(self) -> None: + # was: self._store.startDoc() + self._store.startDoc(self._formula) + + def endDoc(self) -> Optional["Formula"]: + """Signal end of document and stop parsing. returns formula""" + self._store.endDoc(self._formula) # don't canonicalize yet + return self._formula + + def makeStatement(self, quadruple): + # $$$$$$$$$$$$$$$$$$$$$ + # print "# Parser output: ", `quadruple` + self._store.makeStatement(quadruple, why=self._reason2) + + def statement(self, argstr: str, i: int) -> int: + r: typing.List[Any] = [] + i = self.object(argstr, i, r) # Allow literal for subject - extends RDF + if i < 0: + return i + + j = self.property_list(argstr, i, r[0]) + + if j < 0: + self.BadSyntax(argstr, i, "expected propertylist") + return j + + def subject(self, argstr: str, i: int, res: typing.List[Any]) -> int: + return self.item(argstr, i, res) + + def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: + """has _prop_ + is _prop_ of + a + = + _prop_ + >- prop -> + <- prop -< + _operator_""" + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + + r: typing.List[Any] = [] + + j = self.tok("has", argstr, i) + if j >= 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") + + i = self.prop(argstr, j, r) + if i < 0: + self.BadSyntax(argstr, j, "expected property after 'has'") + res.append(("->", r[0])) + return i + + j = self.tok("is", argstr, i) + if j >= 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") + + i = self.prop(argstr, j, r) + if i < 0: + self.BadSyntax(argstr, j, "expected after 'is'") + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "End of file found, expected property after 'is'" + ) + i = j + j = self.tok("of", argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected 'of' after 'is' ") + res.append(("<-", r[0])) + return j + + j = self.tok("a", argstr, i) + if j >= 0: + res.append(("->", RDF_type)) + return j + + if argstr[i : i + 2] == "<=": + if self.turtle: + self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") + + res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) + return i + 2 + + if argstr[i] == "=": + if self.turtle: + self.BadSyntax(argstr, i, "Found '=' in Turtle mode") + if argstr[i + 1] == ">": + res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) + return i + 2 + res.append(("->", DAML_sameAs)) + return i + 1 + + if argstr[i : i + 2] == ":=": + if self.turtle: + self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") + + # patch file relates two formulae, uses this @@ really? + res.append(("->", Logic_NS + "becomes")) + return i + 2 + + j = self.prop(argstr, i, r) + if j >= 0: + res.append(("->", r[0])) + return j + + if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": + self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") + + return -1 + + def prop(self, argstr: str, i: int, res): + return self.item(argstr, i, res) + + def item(self, argstr: str, i, res): + return self.path(argstr, i, res) + + def blankNode(self, uri=None): + return self._store.newBlankNode(self._context, uri, why=self._reason2) + + def path(self, argstr: str, i: int, res): + """Parse the path production.""" + j = self.nodeOrLiteral(argstr, i, res) + if j < 0: + return j # nope + + while argstr[j] in {"!", "^"}: # no spaces, must follow exactly (?) + ch = argstr[j] + subj = res.pop() + obj = self.blankNode(uri=self.here(j)) + j = self.node(argstr, j + 1, res) + if j < 0: + self.BadSyntax(argstr, j, "EOF found in middle of path syntax") + pred = res.pop() + if ch == "^": # Reverse traverse + self.makeStatement((self._context, pred, obj, subj)) + else: + self.makeStatement((self._context, pred, subj, obj)) + res.append(obj) + return j + + def anonymousNode(self, ln: str): + """Remember or generate a term for one of these _: anonymous nodes""" + if ("RdfstarTriple" in ln): + + term = self._rdfstartripleNodes.get(ln, None) + if term is not None: + return term + term = self._store.newRdfstarTriple(self._context, why=self._reason2, hashvalue = ln) + self._rdfstartripleNodes[ln] = term + return term + term = self._anonymousNodes.get(ln, None) + if term is not None: + return term + term = self._store.newBlankNode(self._context, why=self._reason2) + self._anonymousNodes[ln] = term + return term + + def node(self, argstr: str, i: int, res, subjectAlready=None): + """Parse the production. + Space is now skipped once at the beginning + instead of in multiple calls to self.skipSpace(). + """ + subj = subjectAlready + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + i = j + ch = argstr[i] # Quick 1-character checks first: + + if ch == "[": + bnodeID = self.here(i) + j = self.skipSpace(argstr, i + 1) + if j < 0: + self.BadSyntax(argstr, i, "EOF after '['") + # Hack for "is" binding name to anon node + if argstr[j] == "=": + if self.turtle: + self.BadSyntax( + argstr, j, "Found '[=' or '[ =' when in turtle mode." + ) + i = j + 1 + objs: typing.List[Any] = [] + j = self.objectList(argstr, i, objs) + if j >= 0: + subj = objs[0] + if len(objs) > 1: + for obj in objs: + self.makeStatement((self._context, DAML_sameAs, subj, obj)) + j = self.skipSpace(argstr, j) + if j < 0: + self.BadSyntax( + argstr, i, "EOF when objectList expected after [ = " + ) + if argstr[j] == ";": + j += 1 + else: + self.BadSyntax(argstr, i, "objectList expected after [= ") + + if subj is None: + subj = self.blankNode(uri=bnodeID) + + i = self.property_list(argstr, j, subj) + if i < 0: + self.BadSyntax(argstr, j, "property_list expected") + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "EOF when ']' expected after [ " + ) + if argstr[j] != "]": + + self.BadSyntax(argstr, j, "']' expected") + res.append(subj) + return j + 1 + + if not self.turtle and ch == "{": + # if self.turtle: + # self.BadSyntax(argstr, i, + # "found '{' while in Turtle mode, Formulas not supported!") + ch2 = argstr[i + 1] + if ch2 == "$": + # a set + i += 1 + j = i + 1 + List = [] + first_run = True + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '$}', found end.") + if argstr[i : i + 2] == "$}": + j = i + 2 + break + + if not first_run: + if argstr[i] == ",": + i += 1 + else: + self.BadSyntax(argstr, i, "expected: ','") + else: + first_run = False + + item: typing.List[Any] = [] + j = self.item(argstr, i, item) # @@@@@ should be path, was object + if j < 0: + self.BadSyntax(argstr, i, "expected item in set or '$}'") + List.append(self._store.intern(item[0])) + res.append(self._store.newSet(List, self._context)) + return j + else: + # parse a formula + j = i + 1 + oldParentContext = self._parentContext + self._parentContext = self._context + parentAnonymousNodes = self._anonymousNodes + grandParentVariables = self._parentVariables + self._parentVariables = self._variables + self._anonymousNodes = {} + self._variables = self._variables.copy() + reason2 = self._reason2 + self._reason2 = becauseSubexpression + if subj is None: + subj = self._store.newFormula() + self._context = subj + + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '}', found end.") + + if argstr[i] == "}": + j = i + 1 + break + + j = self.directiveOrStatement(argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected statement or '}'") + + self._anonymousNodes = parentAnonymousNodes + self._variables = self._parentVariables + self._parentVariables = grandParentVariables + self._context = self._parentContext + self._reason2 = reason2 + self._parentContext = oldParentContext + res.append(subj.close()) # No use until closed + return j + + if ch == "(": + thing_type = self._store.newList + ch2 = argstr[i + 1] + if ch2 == "$": + thing_type = self._store.newSet + i += 1 + j = i + 1 + + List = [] + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed ')', found end.") + if argstr[i] == ")": + j = i + 1 + break + + item = [] + j = self.item(argstr, i, item) # @@@@@ should be path, was object + if j < 0: + self.BadSyntax(argstr, i, "expected item in list or ')'") + List.append(self._store.intern(item[0])) + res.append(thing_type(List, self._context)) + return j + + j = self.tok("this", argstr, i) # This context + if j >= 0: + self.BadSyntax( + argstr, + i, + "Keyword 'this' was ancient N3. Now use " + + "@forSome and @forAll keywords.", + ) + + # booleans + j = self.tok("true", argstr, i) + if j >= 0: + res.append(True) + return j + j = self.tok("false", argstr, i) + if j >= 0: + res.append(False) + return j + + if subj is None: # If this can be a named node, then check for a name. + j = self.uri_ref2(argstr, i, res) + if j >= 0: + return j + + return -1 + + def addingquotedRdfstarTriple(self, quoted_triple_list, dira): + if quoted_triple_list[0] == rdflib.term.URIRef('https://w3id.org/rdf-star/AssertedStatement'): + if quoted_triple_list[1] == rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement'): + if dira == "->": + self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[3], quoted_triple_list[5])) + quoted_triple_list[2].setSubject(quoted_triple_list[3]) + quoted_triple_list[2].setPredicate(quoted_triple_list[4]) + quoted_triple_list[2].setObject(quoted_triple_list[5]) + + else: + self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[5], quoted_triple_list[3])) + + quoted_triple_list[2].setSubject(quoted_triple_list[4]) + quoted_triple_list[2].setPredicate(quoted_triple_list[5]) + quoted_triple_list[2].setObject(quoted_triple_list[6]) + + else: + if dira == "->": + self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[1], quoted_triple_list[3])) + else: + self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[3], quoted_triple_list[1])) + else: + if dira == "->": + quoted_triple_list[1].setSubject(quoted_triple_list[2]) + quoted_triple_list[1].setPredicate(quoted_triple_list[3]) + quoted_triple_list[1].setObject(quoted_triple_list[4]) + + else: + quoted_triple_list[1].setSubject(quoted_triple_list[2]) + quoted_triple_list[1].setPredicate(quoted_triple_list[3]) + quoted_triple_list[1].setObject(quoted_triple_list[4]) + + def property_list(self, argstr: str, i: int, subj): + """Parse property list + Leaves the terminating punctuation in the buffer + """ + global quoted_triple_list + while 1: + while 1: # skip repeat ; + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "EOF found when expected verb in property list" + ) + if argstr[j] != ";": + break + i = j + 1 + + if argstr[j : j + 2] == ":-": + if self.turtle: + self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") + i = j + 2 + res: typing.List[Any] = [] + j = self.node(argstr, i, res, subj) + if j < 0: + self.BadSyntax(argstr, i, "bad {} or () or [] node after :- ") + i = j + continue + i = j + v: typing.List[Any] = [] + j = self.verb(argstr, i, v) + if j <= 0: + return i # void but valid + + objs: typing.List[Any] = [] + i = self.objectList(argstr, j, objs) + if i < 0: + self.BadSyntax(argstr, j, "objectList expected") + + for obj in objs: + dira, sym = v[0] + if "RdfstarTriple" in subj: + if "rdf-star" in str(obj): + if len(quoted_triple_list) > 2: + quoted_triple_list = [] + quoted_triple_list.append(obj) + if (rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement') in quoted_triple_list) & (not (subj in quoted_triple_list)): + quoted_triple_list.append(subj) + if "#object" in sym: + self.addingquotedRdfstarTriple(quoted_triple_list, dira) + else: + if dira == "->": + self.makeStatement((self._context, sym, subj, obj)) + else: + self.makeStatement((self._context, sym, obj, subj)) + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, j, "EOF found in list of objects") + if argstr[i] != ";": + return i + i += 1 # skip semicolon and continue + + def commaSeparatedList(self, argstr: str, j, res, what): + """return value: -1 bad syntax; >1 new position in argstr + res has things found appended + """ + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "EOF found expecting comma sep list") + if argstr[i] == ".": + return j # empty list is OK + i = what(argstr, i, res) + if i < 0: + return -1 + + while 1: + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + ch = argstr[j] + if ch != ",": + if ch != ".": + return -1 + return j # Found but not swallowed "." + i = what(argstr, j + 1, res) + if i < 0: + self.BadSyntax(argstr, i, "bad list content") + + def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: + i = self.object(argstr, i, res) + if i < 0: + return -1 + while 1: + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, j, "EOF found after object") + if argstr[j] != ",": + return j # Found something else! + i = self.object(argstr, j + 1, res) + if i < 0: + return i + + def checkDot(self, argstr: str, i: int): + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + ch = argstr[j] + if ch == ".": + return j + 1 # skip + if ch == "}": + return j # don't skip it + if ch == "]": + return j + self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") + + def uri_ref2(self, argstr: str, i: int, res): + """Generate uri from n3 representation. + Note that the RDF convention of directly concatenating + NS and local name is now used though I prefer inserting a '#' + to make the namesapces look more like what XML folks expect. + """ + qn: typing.List[Any] = [] + j = self.qname(argstr, i, qn) + if j >= 0: + pfx, ln = qn[0] + if pfx is None: + assert 0, "not used?" + ns = self._baseURI + ADDED_HASH # type: ignore[unreachable] + else: + try: + ns = self._bindings[pfx] + except KeyError: + if pfx == "_": # Magic prefix 2001/05/30, can be changed + res.append(self.anonymousNode(ln)) + return j + if not self.turtle and pfx == "": + ns = join(self._baseURI or "", "#") + else: + self.BadSyntax(argstr, i, 'Prefix "%s:" not bound' % (pfx)) + symb = self._store.newSymbol(ns + ln) + res.append(self._variables.get(symb, symb)) + return j + + i = self.skipSpace(argstr, i) + if i < 0: + return -1 + + if argstr[i] == "?": + v: typing.List[Any] = [] + j = self.variable(argstr, i, v) + if j > 0: # Forget variables as a class, only in context. + res.append(v[0]) + return j + return -1 + + elif argstr[i] == "<": + st = i + 1 + i = argstr.find(">", st) + if i >= 0: + uref = argstr[st:i] # the join should dealt with "": + + # expand unicode escapes + uref = unicodeEscape8.sub(unicodeExpand, uref) + uref = unicodeEscape4.sub(unicodeExpand, uref) + + if self._baseURI: + uref = join(self._baseURI, uref) # was: uripath.join + else: + assert ( + ":" in uref + ), "With no base URI, cannot deal with relative URIs" + if argstr[i - 1] == "#" and not uref[-1:] == "#": + uref += "#" # She meant it! Weirdness in urlparse? + symb = self._store.newSymbol(uref) + res.append(self._variables.get(symb, symb)) + return i + 1 + self.BadSyntax(argstr, j, "unterminated URI reference") + + elif self.keywordsSet: + v = [] + j = self.bareWord(argstr, i, v) + if j < 0: + return -1 # Forget variables as a class, only in context. + if v[0] in self.keywords: + self.BadSyntax(argstr, i, 'Keyword "%s" not allowed here.' % v[0]) + res.append(self._store.newSymbol(self._bindings[""] + v[0])) + return j + else: + return -1 + + def skipSpace(self, argstr: str, i: int): + """Skip white space, newlines and comments. + return -1 if EOF, else position of first non-ws character""" + + # Most common case is a non-commented line starting with few spaces and tabs. + try: + while True: + ch = argstr[i] + if ch in {" ", "\t"}: + i += 1 + continue + elif ch not in {"#", "\r", "\n"}: + return i + break + except IndexError: + return -1 + + while 1: + m = eol.match(argstr, i) + if m is None: + break + self.lines += 1 + self.startOfLine = i = m.end() # Point to first character unmatched + m = ws.match(argstr, i) + if m is not None: + i = m.end() + m = eof.match(argstr, i) + return i if m is None else -1 + + def variable(self, argstr: str, i: int, res): + """?abc -> variable(:abc)""" + + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + + if argstr[j] != "?": + return -1 + j += 1 + i = j + if argstr[j] in numberChars: + self.BadSyntax(argstr, j, "Variable name can't start with '%s'" % argstr[j]) + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 + if self._parentContext is None: + varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] + if varURI not in self._variables: + self._variables[varURI] = self._context.newUniversal( + varURI, why=self._reason2 + ) + res.append(self._variables[varURI]) + return i + # @@ was: + # self.BadSyntax(argstr, j, + # "Can't use ?xxx syntax for variable in outermost level: %s" + # % argstr[j-1:i]) + varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] + if varURI not in self._parentVariables: + self._parentVariables[varURI] = self._parentContext.newUniversal( + varURI, why=self._reason2 + ) + res.append(self._parentVariables[varURI]) + return i + + def bareWord(self, argstr: str, i: int, res): + """abc -> :abc""" + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + + if argstr[j] in numberChars or argstr[j] in _notKeywordsChars: + return -1 + i = j + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 + res.append(argstr[j:i]) + return i + + def qname(self, argstr: str, i: int, res): + """ + xyz:def -> ('xyz', 'def') + If not in keywords and keywordsSet: def -> ('', 'def') + :def -> ('', 'def') + """ + + i = self.skipSpace(argstr, i) + if i < 0: + return -1 + + c = argstr[i] + if c in numberCharsPlus: + return -1 + len_argstr = len(argstr) + if c not in _notNameChars: + j = i + i += 1 + + try: + while argstr[i] not in _notNameChars: + i += 1 + except IndexError: + pass # Very rare. + + if argstr[i - 1] == ".": # qname cannot end with "." + i -= 1 + if i == j: + return -1 + ln = argstr[j:i] + + else: # First character is non-alpha + ln = "" # Was: None - TBL (why? useful?) + + if i < len_argstr and argstr[i] == ":": + pfx = ln + # bnodes names have different rules + if pfx == "_": + allowedChars = _notNameChars + else: + allowedChars = _notQNameChars + + i += 1 + lastslash = False + start = i + ln = "" + while i < len_argstr: + c = argstr[i] + if c == "\\" and not lastslash: # Very rare. + lastslash = True + if start < i: + ln += argstr[start:i] + start = i + 1 + elif c not in allowedChars or lastslash: # Most common case is "a-zA-Z" + if lastslash: + if c not in escapeChars: + raise BadSyntax( + self._thisDoc, + self.lines, + argstr, + i, + "illegal escape " + c, + ) + elif c == "%": # Very rare. + if ( + argstr[i + 1] not in hexChars + or argstr[i + 2] not in hexChars + ): + raise BadSyntax( + self._thisDoc, + self.lines, + argstr, + i, + "illegal hex escape " + c, + ) + lastslash = False + else: + break + i += 1 + + if lastslash: + raise BadSyntax( + self._thisDoc, self.lines, argstr, i, "qname cannot end with \\" + ) + + if argstr[i - 1] == ".": + # localname cannot end in . + if len(ln) == 0 and start == i: + return -1 + i -= 1 + + if start < i: + ln += argstr[start:i] + + res.append((pfx, ln)) + return i + + else: # delimiter was not ":" + if ln and self.keywordsSet and ln not in self.keywords: + res.append(("", ln)) + return i + return -1 + + def object(self, argstr: str, i: int, res): + j = self.subject(argstr, i, res) + if j >= 0: + return j + else: + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + else: + i = j + + ch = argstr[i] + if ch in self.string_delimiters: + ch_three = ch * 3 + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 + else: + delim = ch + i += 1 + + j, s = self.strconst(argstr, i, delim) + + res.append(self._store.newLiteral(s)) # type: ignore[call-arg] # TODO FIXME + return j + else: + return -1 + + def nodeOrLiteral(self, argstr: str, i: int, res): + j = self.node(argstr, i, res) + startline = self.lines # Remember where for error messages + if j >= 0: + return j + else: + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + else: + i = j + + ch = argstr[i] + if ch in numberCharsPlus: + m = exponent_syntax.match(argstr, i) + if m: + j = m.end() + res.append(float(argstr[i:j])) + return j + + m = decimal_syntax.match(argstr, i) + if m: + j = m.end() + res.append(Decimal(argstr[i:j])) + return j + + m = integer_syntax.match(argstr, i) + if m: + j = m.end() + res.append(long_type(argstr[i:j])) + return j + + # return -1 ## or fall through? + + ch_three = ch * 3 + if ch in self.string_delimiters: + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 + else: + delim = ch + i += 1 + + dt = None + j, s = self.strconst(argstr, i, delim) + lang = None + if argstr[j] == "@": # Language? + m = langcode.match(argstr, j + 1) + if m is None: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "Bad language code syntax on string " + "literal, after @", + ) + i = m.end() + lang = argstr[j + 1 : i] + j = i + if argstr[j : j + 2] == "^^": + res2: typing.List[Any] = [] + j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI + dt = res2[0] + res.append(self._store.newLiteral(s, dt, lang)) + return j + else: + return -1 + + def uriOf(self, sym): + if isinstance(sym, tuple): + return sym[1] # old system for --pipe + # return sym.uriref() # cwm api + return sym + + def strconst(self, argstr: str, i: int, delim): + """parse an N3 string constant delimited by delim. + return index, val + """ + delim1 = delim[0] + delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 + + j = i + ustr = "" # Empty unicode string + startline = self.lines # Remember where for error messages + len_argstr = len(argstr) + while j < len_argstr: + if argstr[j] == delim1: + if delim == delim1: # done when delim is " or ' + i = j + 1 + return i, ustr + if ( + delim == delim3 + ): # done when delim is """ or ''' and, respectively ... + if argstr[j : j + 5] == delim5: # ... we have "" or '' before + i = j + 5 + ustr += delim2 + return i, ustr + if argstr[j : j + 4] == delim4: # ... we have " or ' before + i = j + 4 + ustr += delim1 + return i, ustr + if argstr[j : j + 3] == delim3: # current " or ' is part of delim + i = j + 3 + return i, ustr + + # we are inside of the string and current char is " or ' + j += 1 + ustr += delim1 + continue + + m = interesting.search(argstr, j) # was argstr[j:]. + # Note for pos param to work, MUST be compiled ... re bug? + assert m, "Quote expected in string at ^ in %s^%s" % ( + argstr[j - 20 : j], + argstr[j : j + 20], + ) # at least need a quote + + i = m.start() + try: + ustr += argstr[j:i] + except UnicodeError: + err = "" + for c in argstr[j:i]: + err = err + (" %02x" % ord(c)) + streason = sys.exc_info()[1].__str__() + raise BadSyntax( + self._thisDoc, + startline, + argstr, + j, + "Unicode error appending characters" + + " %s to string, because\n\t%s" % (err, streason), + ) + + # print "@@@ i = ",i, " j=",j, "m.end=", m.end() + + ch = argstr[i] + if ch == delim1: + j = i + continue + elif ch in {'"', "'"} and ch != delim1: + ustr += ch + j = i + 1 + continue + elif ch in {"\r", "\n"}: + if delim == delim1: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "newline found in string literal", + ) + self.lines += 1 + ustr += ch + j = i + 1 + self.startOfLine = j + + elif ch == "\\": + j = i + 1 + ch = argstr[j] # Will be empty if string ends + if not ch: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "unterminated string literal (2)", + ) + k = "abfrtvn\\\"'".find(ch) + if k >= 0: + uch = "\a\b\f\r\t\v\n\\\"'"[k] + ustr += uch + j += 1 + elif ch == "u": + j, ch = self.uEscape(argstr, j + 1, startline) + ustr += ch + elif ch == "U": + j, ch = self.UEscape(argstr, j + 1, startline) + ustr += ch + else: + self.BadSyntax(argstr, i, "bad escape") + + self.BadSyntax(argstr, i, "unterminated string literal") + + def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): + if len(argstr) < i + n: + raise BadSyntax( + self._thisDoc, startline, argstr, i, "unterminated string literal(3)" + ) + try: + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) + except: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "bad string literal hex escape: " + argstr[i : i + n], + ) + + def uEscape(self, argstr: str, i, startline): + return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, "u") + + def UEscape(self, argstr: str, i, startline): + return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U") + + def BadSyntax(self, argstr: str, i, msg): + raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) + + +# If we are going to do operators then they should generate +# [ is operator:plus of ( \1 \2 ) ] + + +class BadSyntax(SyntaxError): + def __init__(self, uri, lines, argstr, i, why): + self._str = argstr.encode("utf-8") # Better go back to strings for errors + self._i = i + self._why = why + self.lines = lines + self._uri = uri + + def __str__(self): + argstr = self._str + i = self._i + st = 0 + if i > 60: + pre = "..." + st = i - 60 + else: + pre = "" + if len(argstr) - i > 60: + post = "..." + else: + post = "" + + return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' % ( + self.lines + 1, + self._uri, + self._why, + pre, + argstr[st:i], + argstr[i : i + 60], + post, + ) + + @property + def message(self): + return str(self) + + +############################################################################### +class Formula(object): + number = 0 + + def __init__(self, parent): + self.uuid = uuid4().hex + self.counter = 0 + Formula.number += 1 + self.number = Formula.number + self.existentials = {} + self.universals = {} + + self.quotedgraph = QuotedGraph(store=parent.store, identifier=self.id()) + + def __str__(self): + return "_:Formula%s" % self.number + + def id(self): + return BNode("_:Formula%s" % self.number) + + def newBlankNode(self, uri=None, why=None): + if uri is None: + self.counter += 1 + bn = BNode("f%sb%s" % (self.uuid, self.counter)) + else: + bn = BNode(uri.split("#").pop().replace("_", "b")) + return bn + + def newRdfstarTriple(self, hashvalue, uri=None, why=None): + if uri is None: + # self.counter += 1 + rdfstartriple = RdfstarTriple(hashvalue = hashvalue) + else: + rdfstartriple = RdfstarTriple(hashvalue = hashvalue) + return rdfstartriple + + def newUniversal(self, uri, why=None): + return Variable(uri.split("#").pop()) + + def declareExistential(self, x): + self.existentials[x] = self.newBlankNode() + + def close(self): + + return self.quotedgraph + + +r_hibyte = re.compile(r"([\x80-\xff])") + + +class RDFSink(object): + def __init__(self, graph: Graph): + self.rootFormula: Optional[Formula] = None + self.uuid = uuid4().hex + self.counter = 0 + self.graph = graph + + def newFormula(self) -> Formula: + fa = getattr(self.graph.store, "formula_aware", False) + if not fa: + raise ParserError( + "Cannot create formula parser with non-formula-aware store." + ) + f = Formula(self.graph) + return f + + def newGraph(self, identifier: Identifier) -> Graph: + return Graph(self.graph.store, identifier) + + def newSymbol(self, *args: str): + return URIRef(args[0]) + + def newBlankNode( + self, + arg: Optional[Union[Formula, Graph, Any]] = None, + uri: Optional[str] = None, + why: Optional[Callable[[], None]] = None, + ) -> BNode: + if isinstance(arg, Formula): + return arg.newBlankNode(uri) + elif isinstance(arg, Graph) or arg is None: + self.counter += 1 + bn = BNode("n%sb%s" % (self.uuid, self.counter)) + else: + bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) + return bn + + def newRdfstarTriple( + self, + # hashvalue: Optional[str], + # arg: Optional[Union[Formula, Graph, Any]] = None, + # uri: Optional[str] = None, + arg: Optional[Union[Formula, Graph, Any]] = None, + uri: Optional[str] = None, + why: Optional[Callable[[], None]] = None, + hashvalue: Optional[str] = None + ) -> RdfstarTriple: + + if isinstance(arg, Formula): + + return arg.newRdfstarTriple(hashvalue = hashvalue) + elif isinstance(arg, Graph) or arg is None: + + rdfstartriple = RdfstarTriple(hashvalue =hashvalue) + else: + + + rdfstartriple = RdfstarTriple(hashvalue =hashvalue) + return rdfstartriple + + def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: + if dt: + return Literal(s, datatype=dt) + else: + return Literal(s, lang=lang) + + def newList(self, n: typing.List[Any], f: Optional[Formula]): + nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") + if not n: + return nil + + first = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#first") + rest = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest") + af = a = self.newBlankNode(f) + + for ne in n[:-1]: + self.makeStatement((f, first, a, ne)) + an = self.newBlankNode(f) + self.makeStatement((f, rest, a, an)) + a = an + self.makeStatement((f, first, a, n[-1])) + self.makeStatement((f, rest, a, nil)) + return af + + def newSet(self, *args): + return set(args) + + def setDefaultNamespace(self, *args) -> str: + return ":".join(repr(n) for n in args) + + def makeStatement(self, quadruple, why=None) -> None: + f, p, s, o = quadruple + + if hasattr(p, "formula"): + raise ParserError("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.add((s, p, o)) + elif isinstance(f, Formula): + f.quotedgraph.add((s, p, o)) + else: + f.add((s, p, o)) + + # return str(quadruple) + + def normalise(self, f: Optional[Formula], n): + if isinstance(n, tuple): + return URIRef(str(n[1])) + + if isinstance(n, bool): + s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) + return s + + if isinstance(n, int) or isinstance(n, long_type): + s = Literal(str(n), datatype=INTEGER_DATATYPE) + return s + + if isinstance(n, Decimal): + value = str(n) + if value == "-0": + value = "0" + s = Literal(value, datatype=DECIMAL_DATATYPE) + return s + + if isinstance(n, float): + s = Literal(str(n), datatype=DOUBLE_DATATYPE) + return s + + if isinstance(f, Formula): + if n in f.existentials: + return f.existentials[n] + + # if isinstance(n, Var): + # if f.universals.has_key(n): + # return f.universals[n] + # f.universals[n] = f.newBlankNode() + # return f.universals[n] + + return n + + def intern(self, something: AnyT) -> AnyT: + return something + + def bind(self, pfx, uri): + pass # print pfx, ':', uri + + def startDoc(self, formula: Optional[Formula]): + self.rootFormula = formula + + def endDoc(self, formula: Optional[Formula]) -> None: + pass + + +################################################### +# +# Utilities +# + + +def hexify(ustr): + """Use URL encoding to return an ASCII string + corresponding to the given UTF8 string + >>> hexify("http://example/a b") + b'http://example/a%20b' + """ + # s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 126 or ord(ch) < 33: + ch = "%%%02X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return s.encode("latin-1") + + +class NtriplesStarParser(Parser): + + """ + An RDFLib parser for Turtle + See http://www.w3.org/TR/turtle/ + """ + + def __init__(self): + pass + + def parse( + self, + source: "InputSource", + graph: Graph, + encoding: Optional[str] = "utf-8", + turtle: bool = True, + ): + if encoding not in [None, "utf-8"]: + raise ParserError( + "N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding + ) + + sink = RDFSink(graph) + + baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") + p = SinkParser(sink, baseURI=baseURI, turtle=turtle) + # N3 parser prefers str stream + # stream = source.getCharacterStream() + # if not stream: + # stream = source.getByteStream() + # p.loadStream(stream) + + if hasattr(source, "file"): + f = open(source.file.name, "rb") + rdbytes = f.read() + f.close() + elif hasattr(source, "_InputSource__bytefile"): + if hasattr(source._InputSource__bytefile, "wrapped"): + f = open((source._InputSource__bytefile.wrapped.strip().splitlines())[0], "rb") # what if multiple files + rdbytes = f.read() + f.close() + + bp = rdbytes.decode("utf-8") + ou = RDFstarParsings(bp) + p.feed(ou) + p.endDoc() + for prefix, namespace in p._bindings.items(): + graph.bind(prefix, namespace) + + +class N3Parser(NtriplesStarParser): + + """ + An RDFLib parser for Notation3 + See http://www.w3.org/DesignIssues/Notation3.html + """ + + def __init__(self): + pass + + def parse(self, source, graph, encoding="utf-8"): + # we're currently being handed a Graph, not a ConjunctiveGraph + # context-aware is this implied by formula_aware + ca = getattr(graph.store, "context_aware", False) + fa = getattr(graph.store, "formula_aware", False) + if not ca: + raise ParserError("Cannot parse N3 into non-context-aware store.") + elif not fa: + raise ParserError("Cannot parse N3 into non-formula-aware store.") + + conj_graph = ConjunctiveGraph(store=graph.store) + conj_graph.default_context = graph # TODO: CG __init__ should have a + # default_context arg + # TODO: update N3Processor so that it can use conj_graph as the sink + conj_graph.namespace_manager = graph.namespace_manager + + NtriplesStarParser.parse(self, source, conj_graph, encoding, turtle=False) diff --git a/rdflib/plugins/parsers/trigstar.py b/rdflib/plugins/parsers/trigstar.py new file mode 100644 index 000000000..6e9c1016d --- /dev/null +++ b/rdflib/plugins/parsers/trigstar.py @@ -0,0 +1,2438 @@ +#!/usr/bin/env python +""" +notation3.py - Standalone Notation3 Parser +Derived from CWM, the Closed World Machine +Authors of the original suite: +* Dan Connolly <@@> +* Tim Berners-Lee <@@> +* Yosi Scharf <@@> +* Joseph M. Reagle Jr. +* Rich Salz +http://www.w3.org/2000/10/swap/notation3.py +Copyright 2000-2007, World Wide Web Consortium. +Copyright 2001, MIT. +Copyright 2001, Zolera Systems Inc. +License: W3C Software License +http://www.w3.org/Consortium/Legal/copyright-software +Modified by Sean B. Palmer +Copyright 2007, Sean B. Palmer. +Modified to work with rdflib by Gunnar Aastrand Grimnes +Copyright 2010, Gunnar A. Grimnes +""" +import codecs +import os +import re +from smtplib import quotedata +import sys +import rdflib + +# importing typing for `typing.List` because `List`` is used for something else +import typing +from decimal import Decimal +from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union +from uuid import uuid4 + +from rdflib.compat import long_type +from rdflib.exceptions import ParserError +from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph +from rdflib.term import ( + _XSD_PFX, + RdfstarTriple, + BNode, + Identifier, + Literal, + Node, + URIRef, + Variable, + _unique_id, +) + +__all__ = [ + "BadSyntax", + "N3Parser", + "TurtleParser", + "splitFragP", + "join", + "base", + "runNamespace", + "uniqueURI", + "hexify", +] + +from rdflib.parser import Parser + +if TYPE_CHECKING: + from rdflib.parser import InputSource + +AnyT = TypeVar("AnyT") + + +def splitFragP(uriref, punct=0): + """split a URI reference before the fragment + Punctuation is kept. + e.g. + >>> splitFragP("abc#def") + ('abc', '#def') + >>> splitFragP("abcdef") + ('abcdef', '') + """ + + i = uriref.rfind("#") + if i >= 0: + return uriref[:i], uriref[i:] + else: + return uriref, "" + + +def join(here, there): + """join an absolute URI and URI reference + (non-ascii characters are supported/doctested; + haven't checked the details of the IRI spec though) + ``here`` is assumed to be absolute. + ``there`` is URI reference. + >>> join('http://example/x/y/z', '../abc') + 'http://example/x/abc' + Raise ValueError if there uses relative path + syntax but here has no hierarchical path. + >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE + Traceback (most recent call last): + raise ValueError(here) + ValueError: Base has no slash + after colon - with relative '../foo'. + >>> join('http://example/x/y/z', '') + 'http://example/x/y/z' + >>> join('mid:foo@example', '#foo') + 'mid:foo@example#foo' + We grok IRIs + >>> len(u'Andr\\xe9') + 5 + >>> join('http://example.org/', u'#Andr\\xe9') + u'http://example.org/#Andr\\xe9' + """ + + # assert(here.find("#") < 0), \ + # "Base may not contain hash: '%s'" % here # why must caller splitFrag? + + slashl = there.find("/") + colonl = there.find(":") + + # join(base, 'foo:/') -- absolute + if colonl >= 0 and (slashl < 0 or colonl < slashl): + return there + + bcolonl = here.find(":") + assert bcolonl >= 0, ( + "Base uri '%s' is not absolute" % here + ) # else it's not absolute + + path, frag = splitFragP(there) + if not path: + return here + frag + + # join('mid:foo@example', '../foo') bzzt + if here[bcolonl + 1 : bcolonl + 2] != "/": + raise ValueError( + "Base <%s> has no slash after " + "colon - with relative '%s'." % (here, there) + ) + + if here[bcolonl + 1 : bcolonl + 3] == "//": + bpath = here.find("/", bcolonl + 3) + else: + bpath = bcolonl + 1 + + # join('http://xyz', 'foo') + if bpath < 0: + bpath = len(here) + here = here + "/" + + # join('http://xyz/', '//abc') => 'http://abc' + if there[:2] == "//": + return here[: bcolonl + 1] + there + + # join('http://xyz/', '/abc') => 'http://xyz/abc' + if there[:1] == "/": + return here[:bpath] + there + + slashr = here.rfind("/") + + while 1: + if path[:2] == "./": + path = path[2:] + if path == ".": + path = "" + elif path[:3] == "../" or path == "..": + path = path[3:] + i = here.rfind("/", bpath, slashr) + if i >= 0: + here = here[: i + 1] + slashr = i + else: + break + + return here[: slashr + 1] + path + frag + + +def base(): + """The base URI for this process - the Web equiv of cwd + Relative or absolute unix-standard filenames parsed relative to + this yield the URI of the file. + If we had a reliable way of getting a computer name, + we should put it in the hostname just to prevent ambiguity + """ + # return "file://" + hostname + os.getcwd() + "/" + return "file://" + _fixslash(os.getcwd()) + "/" + + +def _fixslash(s): + """Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" + s = s.replace("\\", "/") + if s[0] != "/" and s[1] == ":": + s = s[2:] # @@@ Hack when drive letter present + return s + + +CONTEXT = 0 +PRED = 1 +SUBJ = 2 +OBJ = 3 + +PARTS = PRED, SUBJ, OBJ +ALL4 = CONTEXT, PRED, SUBJ, OBJ + +SYMBOL = 0 +FORMULA = 1 +LITERAL = 2 +LITERAL_DT = 21 +LITERAL_LANG = 22 +ANONYMOUS = 3 +XMLLITERAL = 25 + +Logic_NS = "http://www.w3.org/2000/10/swap/log#" +NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging +forSomeSym = Logic_NS + "forSome" +forAllSym = Logic_NS + "forAll" + +RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" +RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +OWL_NS = "http://www.w3.org/2002/07/owl#" +DAML_sameAs_URI = OWL_NS + "sameAs" +parsesTo_URI = Logic_NS + "parsesTo" +RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" + +List_NS = RDF_NS_URI # From 20030808 +_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" + +N3_first = (SYMBOL, List_NS + "first") +N3_rest = (SYMBOL, List_NS + "rest") +N3_li = (SYMBOL, List_NS + "li") +N3_nil = (SYMBOL, List_NS + "nil") +N3_List = (SYMBOL, List_NS + "List") +N3_Empty = (SYMBOL, List_NS + "Empty") + + +runNamespaceValue = None + + +def runNamespace(): + """Returns a URI suitable as a namespace for run-local objects""" + # @@@ include hostname (privacy?) (hash it?) + global runNamespaceValue + if runNamespaceValue is None: + runNamespaceValue = join(base(), _unique_id()) + "#" + return runNamespaceValue + + +nextu = 0 + +from rdflib import ConjunctiveGraph +from rdflib.parser import Parser +from .notation3 import SinkParser, RDFSink + +import re +import lark +import hashlib +from lark import ( + Lark, + Transformer, + Tree, +) +from lark.visitors import Visitor +from lark.reconstruct import Reconstructor + +from lark.lexer import ( + Token, +) + +# from pymantic.compat import ( +# binary_type, +# ) +# from pymantic.parsers.base import ( +# BaseParser, +# ) +# from pymantic.primitives import ( +# BlankNode, +# Literal, +# NamedNode, +# Triple, +# ) +# from pymantic.util import ( +# grouper, +# smart_urljoin, +# decode_literal, +# ) + +grammar = r"""trig_doc: (directive | block)* +?statement: directive | triples "." | quotedtriples "." +block: triplesorgraph | wrappedgraph | triples2 | "GRAPH" labelorsubject wrappedgraph +triplesorgraph: labelorsubject (wrappedgraph | predicate_object_list ".") | quotation predicate_object_list "." +triples2: blank_node_property_list predicate_object_list? "." | collection predicate_object_list "." +wrappedgraph: "{" triplesblock? "}" +triplesblock: triples ("." triplesblock?)? | quotedtriples ("." triplesblock?)? +labelorsubject: iri | blank_node +directive: prefix_id | base | sparql_prefix | sparql_base +prefix_id: "@prefix" PNAME_NS IRIREF "." +base: BASE_DIRECTIVE IRIREF "." +sparql_base: /BASE/i IRIREF +sparql_prefix: /PREFIX/i PNAME_NS IRIREF +triples: subject predicate_object_list + | blank_node_property_list predicate_object_list? +insidequotation: qtsubject verb qtobject +quotedtriples: triples compoundanno +predicate_object_list: verb object_list (";" (verb object_list)?)* +?object_list: object ("," object )* +?verb: predicate | /a/ +?subject: iri | blank_node | collection | quotation +?predicate: iri +?object: iri | blank_node | collection | blank_node_property_list | literal | quotation +?literal: rdf_literal | numeric_literal | boolean_literal +?qtsubject: iri | blank_node | quotation +?qtobject: iri | blank_node | literal | quotation +ANGLEBRACKETL: "<<" +ANGLEBRACKETR: ">>" +quotation: ANGLEBRACKETL insidequotation ANGLEBRACKETR +COMPOUNDL: "{|" +COMPOUNDR: "|}" +compoundanno: COMPOUNDL predicate_object_list COMPOUNDR +blank_node_property_list: "[" predicate_object_list "]" +collection: "(" object* ")" +numeric_literal: INTEGER | DECIMAL | DOUBLE +rdf_literal: string (LANGTAG | "^^" iri)? +boolean_literal: /true|false/ +string: STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE + | STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE +iri: IRIREF | prefixed_name +prefixed_name: PNAME_LN | PNAME_NS +blank_node: BLANK_NODE_LABEL | ANON + +BASE_DIRECTIVE: "@base" +IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" +PNAME_NS: PN_PREFIX? ":" +PNAME_LN: PNAME_NS PN_LOCAL +BLANK_NODE_LABEL: "_:" (PN_CHARS_U | /[0-9]/) ((PN_CHARS | ".")* PN_CHARS)? +LANGTAG: "@" /[a-zA-Z]+/ ("-" /[a-zA-Z0-9]+/)* +INTEGER: /[+-]?[0-9]+/ +DECIMAL: /[+-]?[0-9]*/ "." /[0-9]+/ +DOUBLE: /[+-]?/ (/[0-9]+/ "." /[0-9]*/ EXPONENT + | "." /[0-9]+/ EXPONENT | /[0-9]+/ EXPONENT) +EXPONENT: /[eE][+-]?[0-9]+/ +STRING_LITERAL_QUOTE: "\"" (/[^\x22\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "\"" +STRING_LITERAL_SINGLE_QUOTE: "'" (/[^\x27\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "'" +STRING_LITERAL_LONG_SINGLE_QUOTE: "'''" (/'|''/? (/[^'\\]/ | ECHAR | UCHAR))* "'''" +STRING_LITERAL_LONG_QUOTE: "\"\"\"" (/"|""/? (/[^"\\]/ | ECHAR | UCHAR))* "\"\"\"" +UCHAR: "\\u" HEX~4 | "\\U" HEX~8 +ECHAR: "\\" /[tbnrf"'\\]/ +WS: /[\x20\x09\x0D\x0A]/ +ANON: "[" WS* "]" +PN_CHARS_BASE: /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF]/ +PN_CHARS_U: PN_CHARS_BASE | "_" +PN_CHARS: PN_CHARS_U | /[\-0-9\u00B7\u0300-\u036F\u203F-\u2040]/ +PN_PREFIX: PN_CHARS_BASE ((PN_CHARS | ".")* PN_CHARS)? +PN_LOCAL: (PN_CHARS_U | ":" | /[0-9]/ | PLX) ((PN_CHARS | "." | ":" | PLX)* (PN_CHARS | ":" | PLX))? +PLX: PERCENT | PN_LOCAL_ESC +PERCENT: "%" HEX~2 +HEX: /[0-9A-Fa-f]/ +PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ + +%ignore WS +COMMENT: "#" /[^\n]/* +%ignore COMMENT +""" + +trig_lark = Lark(grammar, start="trig_doc", parser="lalr", maybe_placeholders = False) + +from lark import Visitor, v_args +quotation_list = [] +quotation_dict = dict() +vblist = [] +quotationreif = [] +prefix_list = [] +quotationannolist = [] +constructors = "" +assertedtriplelist = [] +quoted_or_not = False +both_quoted_and_asserted = False + +def myHash(text:str): + return str(hashlib.md5(text.encode('utf-8')).hexdigest()) + +class FindVariables(Visitor): + def __init__(self): + super().__init__() + self.variable_list = [] + + def quotation(self, var): + qut = Reconstructor(trig_lark).reconstruct(var) + qut = qut.replace(";", "") + qut = qut.replace(" ", "") + if not (qut in quotation_list): + quotation_list.append(qut) + + vr = Reconstructor(trig_lark).reconstruct(var) + vr = vr.replace(";","") + + quotation_dict[qut] = str(myHash(qut)) + "RdfstarTriple" + qut_hash = ":" + str(myHash(qut)) + # try: + id = quotation_dict.get(vr) + for x in quotation_dict: + if x in vr: + vr = vr.replace(x, ":"+quotation_dict.get(x)) + vr = vr.replace("<<", "") + vr = vr.replace(">>", "") + output = vr.split(":") + output.pop(0) + oa1 = Reconstructor(trig_lark).reconstruct(var) + oa1 = oa1.replace(";","") + output.append(oa1) + if (not (output in quotationreif)): + quotationreif.append(output) + + def blank_node_property_list(self, var): + object_list = ((var.children[0]).children)[1].children + for x in range(0, len(object_list)): + try: + if object_list[x].data == 'quotation': + collection_quotation_reconstruct = Reconstructor(trig_lark).reconstruct(object_list[x]) + collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") + t2 = quotation_dict[collection_quotation_reconstruct] + hasht2 = "_:" + t2 + object_list[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) + except Exception as ex: + object_list = ((var.children[0]).children)[1] + collection_quotation_reconstruct = Reconstructor(trig_lark).reconstruct(object_list) + collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") + try: + t2 = quotation_dict[collection_quotation_reconstruct] + hasht2 = "_:" + t2 + ((var.children[0]).children)[1] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) + break + except Exception as ex2: + pass + + def collection(self, var): + for x in range(0, len(var.children)): + if var.children[x].data == 'quotation': + collection_quotation_reconstruct = Reconstructor(trig_lark).reconstruct(var.children[x]) + collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") + t2 = quotation_dict[collection_quotation_reconstruct] + hasht2 = "_:" + t2 + var.children[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) + + def quotedtriples(self, var): + triple1 = None + subjecthash = "" + + for x in var.children: + if x.data == "triples": + triple1 = Reconstructor(trig_lark).reconstruct(x) + triple1 = triple1.replace(";","") + triple1 = "<<"+triple1+">>" + subjecthash = "_:" + str(myHash(triple1)) + "RdfstarTriple" + if not (triple1 in quotation_list): + quotation_list.append(triple1) + + quotation_dict[triple1] = str(myHash(triple1)) + "RdfstarTriple" + elif x.data == "compoundanno": + for y in x.children: + if (y != "{|") & (y!= "|}"): + count2 = 0 + quotationtriple = [] + for z in y.children: + count2+=1 + z2 = Reconstructor(trig_lark).reconstruct(z) + quotationtriple.append(z2) + if count2 ==2: + quotationtriple.insert(0, subjecthash) + quotationannolist.append(quotationtriple) + count2 = 0 + quotationtriple = [] + + def triples(self, var): + + appends1 = [] + tri = Reconstructor(trig_lark).reconstruct(var) + if ("[" in tri) and (not "RdfstarTriple" in tri) and (not "<<" in tri): + vblist.append([tri]) + else: + tri = tri.replace(";", "") + if not (tri in assertedtriplelist): + assertedtriplelist.append(tri) + for x in var.children: + if x.data == 'predicate_object_list': + xc = x.children + for y in xc: + try: + x2 = Reconstructor(trig_lark).reconstruct(y) + except: + appends1.pop(0) + appends1.append("standard reification") + appends1.append(Reconstructor(trig_lark).reconstruct(var)) + appends1.append(" . \n") + break + x2 = x2.replace(";","") + appends1.append(x2) + else: + anyquotationin = False + x1 = Reconstructor(trig_lark).reconstruct(x) + appends1.append(x1) + + if not (appends1 in vblist): + vblist.append(appends1) + + def insidequotation(self, var): + appends1 = [] + for x in var.children: + x1 = Reconstructor(trig_lark).reconstruct(x) + x1 = x1.replace(";","") + appends1.append(x1) + + if not (appends1 in vblist): + vblist.append(appends1) + + # def prefixed_name(self, children): + # print("prefixed_name") + # print("pn", self) + + def prefix_id(self, children): + print("prefix_id") + + def sparql_prefix(self, children): + print("sparql_prefix", children) + prefix_list.append(children) + + def base(self, children): + print("base") + base_directive, base_iriref = children + print("base", base_directive, base_iriref) + # Workaround for lalr parser token ambiguity in python 2.7 + if base_directive.startswith('@') and base_directive != '@base': + raise ValueError('Unexpected @base: ' + base_directive) + +def RDFstarParsings(rdfstarstring): + global quotationannolist, quotation_dict, vblist, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted + quotationannolist = [] + vblist = [] + quotationreif = [] + prefix_list = [] + constructors = "" + quoted_or_not = False + both_quoted_and_asserted = False + tree = trig_lark.parse(rdfstarstring) + at = FindVariables().visit(tree) + + for y in vblist: + for element_index in range(0, len(y)): + if (y[element_index][0] == "_") & (not (element_index == 0)): + y[element_index]=" "+y[element_index] + result = "".join(y) + result = result.replace(" ", "") + if result in assertedtriplelist: + test1 = "<<"+result+">>" + if test1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + else: + test2 = "<<"+result+">>" + if test2 in quotation_list: + both_quoted_and_asserted = False + quoted_or_not = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + result = "<<"+result+">>" + if not (result in quotation_list): + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = y[z].replace(" ", "") + y[z] = "_:"+quotation_dict[y[z]] + myvalue = str(myHash(result)) + try: + subject = y[0] + predicate = y[1] + object = y[2] + except: + if len(y)==1: + result2 = y[0] + constructors+=result2 + constructors = constructors +".\n" + continue + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + + constructors+=next_rdf_object + else: + value = quotation_dict[result] + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = "_:"+quotation_dict[y[z]] + subject = y[0] + predicate = y[1] + object = y[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + + for z in quotationannolist: + result1 = "".join(z) + result1 = "<<"+result1+">>" + if result1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + value = str(myHash(result1)) + subject = z[0] + predicate = z[1] + object = z[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + + constructors+=next_rdf_object + + for x in range(0, len(prefix_list)): + prefix_list[x] = Reconstructor(trig_lark).reconstruct(prefix_list[x]) + constructors = prefix_list[x]+"\n"+constructors + + constructors = "PREFIX rdfstar: \n"+constructors + + constructors = "PREFIX rdf: \n"+constructors + + + + if not (("PREFIX : " in constructors) or ("PREFIX:" in constructors)): + constructors = "PREFIX : \n"+constructors + + if "PREFIX:" in constructors: + constructors = constructors.replace("PREFIX:", "PREFIX :") + + print("input after preprocessing: ", constructors) + constructors = bytes(constructors, 'utf-8') + return constructors + +def becauseSubGraph(*args, **kwargs): + pass + + +def uniqueURI(): + """A unique URI""" + global nextu + nextu += 1 + return runNamespace() + "u_" + str(nextu) + +tracking = False +chatty_flag = 50 + +# from why import BecauseOfData, becauseSubexpression + +def BecauseOfData(*args, **kargs): + # print args, kargs + pass + + +def becauseSubexpression(*args, **kargs): + # print args, kargs + pass + + +N3_forSome_URI = forSomeSym +N3_forAll_URI = forAllSym + +# Magic resources we know about + +ADDED_HASH = "#" # Stop where we use this in case we want to remove it! +# This is the hash on namespace URIs + +RDF_type = (SYMBOL, RDF_type_URI) +DAML_sameAs = (SYMBOL, DAML_sameAs_URI) + +LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" + +BOOLEAN_DATATYPE = _XSD_PFX + "boolean" +DECIMAL_DATATYPE = _XSD_PFX + "decimal" +DOUBLE_DATATYPE = _XSD_PFX + "double" +FLOAT_DATATYPE = _XSD_PFX + "float" +INTEGER_DATATYPE = _XSD_PFX + "integer" + +option_noregen = 0 # If set, do not regenerate genids on output + +# @@ I18n - the notname chars need extending for well known unicode non-text +# characters. The XML spec switched to assuming unknown things were name +# characters. +# _namechars = string.lowercase + string.uppercase + string.digits + '_-' +_notQNameChars = set("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ +_notKeywordsChars = _notQNameChars | {"."} +_notNameChars = _notQNameChars | {":"} # Assume anything else valid name :-/ +_rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +hexChars = set("ABCDEFabcdef0123456789") +escapeChars = set("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames +numberChars = set("0123456789-") +numberCharsPlus = numberChars | {"+", "."} + + +def unicodeExpand(m): + try: + return chr(int(m.group(1), 16)) + except: + raise Exception("Invalid unicode code point: " + m.group(1)) + + +unicodeEscape4 = re.compile(r"\\u([0-9a-fA-F]{4})") +unicodeEscape8 = re.compile(r"\\U([0-9a-fA-F]{8})") + + +N3CommentCharacter = "#" # For unix script # ! compatibility + +# Parse string to sink +# +# Regular expressions: +eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment +eof = re.compile(r"[ \t]*(#[^\n]*)?$") # end of file, poss. w/comment +ws = re.compile(r"[ \t]*") # Whitespace not including NL +signed_integer = re.compile(r"[-+]?[0-9]+") # integer +integer_syntax = re.compile(r"[-+]?[0-9]+") +decimal_syntax = re.compile(r"[-+]?[0-9]*\.[0-9]+") +exponent_syntax = re.compile( + r"[-+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:e|E)[-+]?[0-9]+" +) +digitstring = re.compile(r"[0-9]+") # Unsigned integer +interesting = re.compile(r"""[\\\r\n\"\']""") +langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") + +quoted_triple_list = [] +class SinkParser: + def __init__( + self, + store: "RDFSink", + openFormula: Optional["Formula"] = None, + thisDoc: str = "", + baseURI: Optional[str] = None, + genPrefix: str = "", + why: Optional[Callable[[], None]] = None, + turtle: bool = False, + ): + """note: namespace names should *not* end in # ; + the # will get added during qname processing""" + + self._bindings = {} + if thisDoc != "": + assert ":" in thisDoc, "Document URI not absolute: <%s>" % thisDoc + self._bindings[""] = thisDoc + "#" # default + + self._store = store + if genPrefix: + # TODO FIXME: there is no function named setGenPrefix + store.setGenPrefix(genPrefix) # type: ignore[attr-defined] # pass it on + + self._thisDoc = thisDoc + self.lines = 0 # for error handling + self.startOfLine = 0 # For calculating character number + self._genPrefix = genPrefix + self.keywords = ["a", "this", "bind", "has", "is", "of", "true", "false"] + self.keywordsSet = 0 # Then only can others be considered qnames + self._anonymousNodes: Dict[str, Node] = {} + self._rdfstartripleNodes: Dict[str, Node] = {} + # Dict of anon nodes already declared ln: Term + self._variables: Dict[Identifier, Identifier] = {} + self._parentVariables: Dict[Identifier, Identifier] = {} + self._reason = why # Why the parser was asked to parse this + + self.turtle = turtle # raise exception when encountering N3 extensions + # Turtle allows single or double quotes around strings, whereas N3 + # only allows double quotes. + self.string_delimiters = ('"', "'") if turtle else ('"',) + + self._reason2 = None # Why these triples + # was: diag.tracking + if tracking: + self._reason2 = BecauseOfData( + store.newSymbol(thisDoc), because=self._reason + ) + + self._baseURI: Optional[str] + if baseURI: + self._baseURI = baseURI + else: + if thisDoc: + self._baseURI = thisDoc + else: + self._baseURI = None + + assert not self._baseURI or ":" in self._baseURI + + if not self._genPrefix: + if self._thisDoc: + self._genPrefix = self._thisDoc + "#_g" + else: + self._genPrefix = uniqueURI() + + self._formula: Formula + if openFormula is None and not turtle: + if self._thisDoc: + # TODO FIXME: store.newFormula does not take any arguments + self._formula = store.newFormula(thisDoc + "#_formula") # type: ignore[call-arg] + else: + self._formula = store.newFormula() + else: + self._formula = openFormula # type: ignore[assignment] + + self._context = self._formula + self._parentContext: Optional[Formula] = None + + def here(self, i: int) -> str: + """String generated from position in file + This is for repeatability when referring people to bnodes in a document. + This has diagnostic uses less formally, as it should point one to which + bnode the arbitrary identifier actually is. It gives the + line and character number of the '[' charcacter or path character + which introduced the blank node. The first blank node is boringly + _L1C1. It used to be used only for tracking, but for tests in general + it makes the canonical ordering of bnodes repeatable.""" + + return "%s_L%iC%i" % (self._genPrefix, self.lines, i - self.startOfLine + 1) + + def formula(self): + return self._formula + + def loadStream(self, stream: Union[IO[str], IO[bytes]]) -> Optional["Formula"]: + return self.loadBuf(stream.read()) # Not ideal + + def loadBuf(self, buf: Union[str, bytes]): + """Parses a buffer and returns its top level formula""" + self.startDoc() + + self.feed(buf) + return self.endDoc() # self._formula + + def feed(self, octets: Union[str, bytes]): + """Feed an octet stream to the parser + if BadSyntax is raised, the string + passed in the exception object is the + remainder after any statements have been parsed. + So if there is more data to feed to the + parser, it should be straightforward to recover.""" + + if not isinstance(octets, str): + s = octets.decode("utf-8") + # NB already decoded, so \ufeff + if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode("utf-8"): + s = s[1:] + else: + s = octets + + i = 0 + while i >= 0: + j = self.skipSpace(s, i) + if j < 0: + return + i = self.directiveOrStatement(s, j) + if i < 0: + # print("# next char: %s" % s) + self.BadSyntax(s, j, "expected directive or statement") + + def directiveOrStatement(self, argstr: str, h: int) -> int: + + i = self.skipSpace(argstr, h) + if i < 0: + return i # EOF + + if self.turtle: + j = self.sparqlDirective(argstr, i) + if j >= 0: + return j + + j = self.directive(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + j = self.statement(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + + return j + + # @@I18N + # _namechars = string.lowercase + string.uppercase + string.digits + '_-' + + def tok(self, tok: str, argstr: str, i: int, colon: bool = False): + """Check for keyword. Space must have been stripped on entry and + we must not be at end of file. + if colon, then keyword followed by colon is ok + (@prefix: is ok, rdf:type shortcut a must be followed by ws) + """ + + assert tok[0] not in _notNameChars # not for punctuation + if argstr[i] == "@": + i += 1 + else: + if tok not in self.keywords: + return -1 # No, this has neither keywords declaration nor "@" + + i_plus_len_tok = i + len(tok) + if ( + argstr[i:i_plus_len_tok] == tok + and (argstr[i_plus_len_tok] in _notKeywordsChars) + or (colon and argstr[i_plus_len_tok] == ":") + ): + return i_plus_len_tok + else: + return -1 + + def sparqlTok(self, tok: str, argstr: str, i: int) -> int: + """Check for SPARQL keyword. Space must have been stripped on entry + and we must not be at end of file. + Case insensitive and not preceded by @ + """ + + assert tok[0] not in _notNameChars # not for punctuation + + len_tok = len(tok) + if argstr[i : i + len_tok].lower() == tok.lower() and ( + argstr[i + len_tok] in _notQNameChars + ): + i += len_tok + return i + else: + return -1 + + def directive(self, argstr: str, i: int) -> int: + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + res: typing.List[Any] = [] + + j = self.tok("bind", argstr, i) # implied "#". Obsolete. + if j > 0: + self.BadSyntax(argstr, i, "keyword bind is obsolete: use @prefix") + + j = self.tok("keywords", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.bareWord) + if i < 0: + self.BadSyntax( + argstr, i, "'@keywords' needs comma separated list of words" + ) + self.setKeywords(res[:]) + return i + + j = self.tok("forAll", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) + if i < 0: + self.BadSyntax(argstr, i, "Bad variable list after @forAll") + for x in res: + # self._context.declareUniversal(x) + if x not in self._variables or x in self._parentVariables: + self._variables[x] = self._context.newUniversal(x) + return i + + j = self.tok("forSome", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) + if i < 0: + self.BadSyntax(argstr, i, "Bad variable list after @forSome") + for x in res: + self._context.declareExistential(x) + return i + + j = self.tok("prefix", argstr, i, colon=True) # no implied "#" + if j >= 0: + t: typing.List[Any] = [] + i = self.qname(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected qname after @prefix") + j = self.uri_ref2(argstr, i, t) + if j < 0: + self.BadSyntax(argstr, i, "expected after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + self.BadSyntax( + argstr, + j, + f"With no base URI, cannot use relative URI in @prefix <{ns}>", + ) + assert ":" in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j = self.tok("base", argstr, i) # Added 2007/7/7 + if j >= 0: + t = [] + i = self.uri_ref2(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + self.BadSyntax( + argstr, + j, + "With no previous base URI, cannot use " + + "relative URI in @base <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def sparqlDirective(self, argstr: str, i: int): + + """ + turtle and trig support BASE/PREFIX without @ and without + terminating . + """ + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + + j = self.sparqlTok("PREFIX", argstr, i) + if j >= 0: + t: typing.List[Any] = [] + i = self.qname(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected qname after @prefix") + j = self.uri_ref2(argstr, i, t) + if j < 0: + self.BadSyntax(argstr, i, "expected after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + self.BadSyntax( + argstr, + j, + "With no base URI, cannot use " + + "relative URI in @prefix <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j = self.sparqlTok("BASE", argstr, i) + if j >= 0: + t = [] + i = self.uri_ref2(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + self.BadSyntax( + argstr, + j, + "With no previous base URI, cannot use " + + "relative URI in @base <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def bind(self, qn: str, uri: bytes) -> None: + assert isinstance(uri, bytes), "Any unicode must be %x-encoded already" + if qn == "": + self._store.setDefaultNamespace(uri) + else: + self._store.bind(qn, uri) + + def setKeywords(self, k: Optional[typing.List[str]]): + """Takes a list of strings""" + if k is None: + self.keywordsSet = 0 + else: + self.keywords = k + self.keywordsSet = 1 + + def startDoc(self) -> None: + # was: self._store.startDoc() + self._store.startDoc(self._formula) + + def endDoc(self) -> Optional["Formula"]: + """Signal end of document and stop parsing. returns formula""" + self._store.endDoc(self._formula) # don't canonicalize yet + return self._formula + + def makeStatement(self, quadruple): + # $$$$$$$$$$$$$$$$$$$$$ + # print "# Parser output: ", `quadruple` + self._store.makeStatement(quadruple, why=self._reason2) + + def makerdfstarStatement(self, quadruple): + # $$$$$$$$$$$$$$$$$$$$$ + # print "# Parser output: ", `quadruple` + self._store.makerdfstarStatement(quadruple, why=self._reason2) + + def statement(self, argstr: str, i: int) -> int: + r: typing.List[Any] = [] + i = self.object(argstr, i, r) # Allow literal for subject - extends RDF + if i < 0: + return i + + j = self.property_list(argstr, i, r[0]) + + if j < 0: + self.BadSyntax(argstr, i, "expected propertylist") + return j + + def subject(self, argstr: str, i: int, res: typing.List[Any]) -> int: + return self.item(argstr, i, res) + + def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: + """has _prop_ + is _prop_ of + a + = + _prop_ + >- prop -> + <- prop -< + _operator_""" + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + + r: typing.List[Any] = [] + + j = self.tok("has", argstr, i) + if j >= 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") + + i = self.prop(argstr, j, r) + if i < 0: + self.BadSyntax(argstr, j, "expected property after 'has'") + res.append(("->", r[0])) + return i + + j = self.tok("is", argstr, i) + if j >= 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") + + i = self.prop(argstr, j, r) + if i < 0: + self.BadSyntax(argstr, j, "expected after 'is'") + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "End of file found, expected property after 'is'" + ) + i = j + j = self.tok("of", argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected 'of' after 'is' ") + res.append(("<-", r[0])) + return j + + j = self.tok("a", argstr, i) + if j >= 0: + res.append(("->", RDF_type)) + return j + + if argstr[i : i + 2] == "<=": + if self.turtle: + self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") + + res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) + return i + 2 + + if argstr[i] == "=": + if self.turtle: + self.BadSyntax(argstr, i, "Found '=' in Turtle mode") + if argstr[i + 1] == ">": + res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) + return i + 2 + res.append(("->", DAML_sameAs)) + return i + 1 + + if argstr[i : i + 2] == ":=": + if self.turtle: + self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") + + # patch file relates two formulae, uses this @@ really? + res.append(("->", Logic_NS + "becomes")) + return i + 2 + + j = self.prop(argstr, i, r) + if j >= 0: + res.append(("->", r[0])) + return j + + if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": + self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") + + return -1 + + def prop(self, argstr: str, i: int, res): + return self.item(argstr, i, res) + + def item(self, argstr: str, i, res): + return self.path(argstr, i, res) + + def blankNode(self, uri=None): + return self._store.newBlankNode(self._context, uri, why=self._reason2) + + def path(self, argstr: str, i: int, res): + """Parse the path production.""" + j = self.nodeOrLiteral(argstr, i, res) + if j < 0: + return j # nope + + while argstr[j] in {"!", "^"}: # no spaces, must follow exactly (?) + ch = argstr[j] + subj = res.pop() + obj = self.blankNode(uri=self.here(j)) + j = self.node(argstr, j + 1, res) + if j < 0: + self.BadSyntax(argstr, j, "EOF found in middle of path syntax") + pred = res.pop() + if ch == "^": # Reverse traverse + self.makeStatement((self._context, pred, obj, subj)) + else: + self.makeStatement((self._context, pred, subj, obj)) + res.append(obj) + return j + + def anonymousNode(self, ln: str): + """Remember or generate a term for one of these _: anonymous nodes""" + if ("RdfstarTriple" in ln): + term = self._rdfstartripleNodes.get(ln, None) + if term is not None: + return term + term = self._store.newRdfstarTriple(self._context, why=self._reason2, hashvalue = ln) + self._rdfstartripleNodes[ln] = term + return term + term = self._anonymousNodes.get(ln, None) + if term is not None: + return term + term = self._store.newBlankNode(self._context, why=self._reason2) + self._anonymousNodes[ln] = term + return term + + def node(self, argstr: str, i: int, res, subjectAlready=None): + """Parse the production. + Space is now skipped once at the beginning + instead of in multiple calls to self.skipSpace(). + """ + subj = subjectAlready + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + i = j + ch = argstr[i] # Quick 1-character checks first: + + if ch == "[": + bnodeID = self.here(i) + j = self.skipSpace(argstr, i + 1) + if j < 0: + self.BadSyntax(argstr, i, "EOF after '['") + # Hack for "is" binding name to anon node + if argstr[j] == "=": + if self.turtle: + self.BadSyntax( + argstr, j, "Found '[=' or '[ =' when in turtle mode." + ) + i = j + 1 + objs: typing.List[Any] = [] + j = self.objectList(argstr, i, objs) + if j >= 0: + subj = objs[0] + if len(objs) > 1: + for obj in objs: + self.makeStatement((self._context, DAML_sameAs, subj, obj)) + j = self.skipSpace(argstr, j) + if j < 0: + self.BadSyntax( + argstr, i, "EOF when objectList expected after [ = " + ) + if argstr[j] == ";": + j += 1 + else: + self.BadSyntax(argstr, i, "objectList expected after [= ") + + if subj is None: + subj = self.blankNode(uri=bnodeID) + i = self.property_list(argstr, j, subj) + if i < 0: + self.BadSyntax(argstr, j, "property_list expected") + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "EOF when ']' expected after [ " + ) + if argstr[j] != "]": + self.BadSyntax(argstr, j, "']' expected") + res.append(subj) + return j + 1 + + if not self.turtle and ch == "{": + # if self.turtle: + # self.BadSyntax(argstr, i, + # "found '{' while in Turtle mode, Formulas not supported!") + ch2 = argstr[i + 1] + if ch2 == "$": + # a set + i += 1 + j = i + 1 + List = [] + first_run = True + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '$}', found end.") + if argstr[i : i + 2] == "$}": + j = i + 2 + break + + if not first_run: + if argstr[i] == ",": + i += 1 + else: + self.BadSyntax(argstr, i, "expected: ','") + else: + first_run = False + + item: typing.List[Any] = [] + j = self.item(argstr, i, item) # @@@@@ should be path, was object + if j < 0: + self.BadSyntax(argstr, i, "expected item in set or '$}'") + List.append(self._store.intern(item[0])) + res.append(self._store.newSet(List, self._context)) + return j + else: + # parse a formula + j = i + 1 + oldParentContext = self._parentContext + self._parentContext = self._context + parentAnonymousNodes = self._anonymousNodes + grandParentVariables = self._parentVariables + self._parentVariables = self._variables + self._anonymousNodes = {} + self._variables = self._variables.copy() + reason2 = self._reason2 + self._reason2 = becauseSubexpression + if subj is None: + subj = self._store.newFormula() + self._context = subj + + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '}', found end.") + + if argstr[i] == "}": + j = i + 1 + break + j = self.directiveOrStatement(argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected statement or '}'") + + self._anonymousNodes = parentAnonymousNodes + self._variables = self._parentVariables + self._parentVariables = grandParentVariables + self._context = self._parentContext + self._reason2 = reason2 + self._parentContext = oldParentContext + res.append(subj.close()) # No use until closed + return j + + if ch == "(": + thing_type = self._store.newList + ch2 = argstr[i + 1] + if ch2 == "$": + thing_type = self._store.newSet + i += 1 + j = i + 1 + + List = [] + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed ')', found end.") + if argstr[i] == ")": + j = i + 1 + break + + item = [] + j = self.item(argstr, i, item) # @@@@@ should be path, was object + if j < 0: + self.BadSyntax(argstr, i, "expected item in list or ')'") + List.append(self._store.intern(item[0])) + res.append(thing_type(List, self._context)) + return j + + j = self.tok("this", argstr, i) # This context + if j >= 0: + self.BadSyntax( + argstr, + i, + "Keyword 'this' was ancient N3. Now use " + + "@forSome and @forAll keywords.", + ) + + # booleans + j = self.tok("true", argstr, i) + if j >= 0: + res.append(True) + return j + j = self.tok("false", argstr, i) + if j >= 0: + res.append(False) + return j + + if subj is None: # If this can be a named node, then check for a name. + j = self.uri_ref2(argstr, i, res) + if j >= 0: + return j + + return -1 + + def addingquotedRdfstarTriple(self, quoted_triple_list, dira): + if quoted_triple_list[0] == rdflib.term.URIRef('https://w3id.org/rdf-star/AssertedStatement'): + if quoted_triple_list[1] == rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement'): + if dira == "->": + self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[3], quoted_triple_list[5])) + quoted_triple_list[2].setSubject(quoted_triple_list[3]) + quoted_triple_list[2].setPredicate(quoted_triple_list[4]) + quoted_triple_list[2].setObject(quoted_triple_list[5]) + + else: + self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[5], quoted_triple_list[3])) + quoted_triple_list[2].setSubject(quoted_triple_list[4]) + quoted_triple_list[2].setPredicate(quoted_triple_list[5]) + quoted_triple_list[2].setObject(quoted_triple_list[6]) + + else: + if dira == "->": + self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[1], quoted_triple_list[3])) + else: + self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[3], quoted_triple_list[1])) + else: + if dira == "->": + quoted_triple_list[1].setSubject(quoted_triple_list[2]) + quoted_triple_list[1].setPredicate(quoted_triple_list[3]) + quoted_triple_list[1].setObject(quoted_triple_list[4]) + + else: + quoted_triple_list[1].setSubject(quoted_triple_list[2]) + quoted_triple_list[1].setPredicate(quoted_triple_list[3]) + quoted_triple_list[1].setObject(quoted_triple_list[4]) + + def property_list(self, argstr: str, i: int, subj): + """Parse property list + Leaves the terminating punctuation in the buffer + """ + global quoted_triple_list + while 1: + while 1: # skip repeat ; + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "EOF found when expected verb in property list" + ) + if argstr[j] != ";": + break + i = j + 1 + + if argstr[j : j + 2] == ":-": + if self.turtle: + self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") + i = j + 2 + res: typing.List[Any] = [] + j = self.node(argstr, i, res, subj) + if j < 0: + self.BadSyntax(argstr, i, "bad {} or () or [] node after :- ") + i = j + continue + i = j + v: typing.List[Any] = [] + j = self.verb(argstr, i, v) + if j <= 0: + return i # void but valid + + objs: typing.List[Any] = [] + + i = self.objectList(argstr, j, objs) + if i < 0: + self.BadSyntax(argstr, j, "objectList expected") + + for obj in objs: + dira, sym = v[0] + if "RdfstarTriple" in subj: + + if "rdf-star" in str(obj): + if len(quoted_triple_list) > 2: + quoted_triple_list = [] + quoted_triple_list.append(obj) + if (rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement') in quoted_triple_list) & (not (subj in quoted_triple_list)): + quoted_triple_list.append(subj) + if "#object" in sym: + + self.addingquotedRdfstarTriple(quoted_triple_list, dira) + else: + if dira == "->": + + self.makeStatement((self._context, sym, subj, obj)) + else: + self.makeStatement((self._context, sym, obj, subj)) + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, j, "EOF found in list of objects") + if argstr[i] != ";": + return i + i += 1 # skip semicolon and continue + + def commaSeparatedList(self, argstr: str, j, res, what): + """return value: -1 bad syntax; >1 new position in argstr + res has things found appended + """ + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "EOF found expecting comma sep list") + if argstr[i] == ".": + return j # empty list is OK + i = what(argstr, i, res) + if i < 0: + return -1 + + while 1: + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + ch = argstr[j] + if ch != ",": + if ch != ".": + return -1 + return j # Found but not swallowed "." + i = what(argstr, j + 1, res) + if i < 0: + self.BadSyntax(argstr, i, "bad list content") + + def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: + i = self.object(argstr, i, res) + if i < 0: + return -1 + while 1: + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, j, "EOF found after object") + if argstr[j] != ",": + return j # Found something else! + i = self.object(argstr, j + 1, res) + if i < 0: + return i + + def checkDot(self, argstr: str, i: int): + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + ch = argstr[j] + if ch == ".": + return j + 1 # skip + if ch == "}": + return j # don't skip it + if ch == "]": + return j + self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") + + def uri_ref2(self, argstr: str, i: int, res): + """Generate uri from n3 representation. + Note that the RDF convention of directly concatenating + NS and local name is now used though I prefer inserting a '#' + to make the namesapces look more like what XML folks expect. + """ + qn: typing.List[Any] = [] + j = self.qname(argstr, i, qn) + if j >= 0: + pfx, ln = qn[0] + if pfx is None: + assert 0, "not used?" + ns = self._baseURI + ADDED_HASH # type: ignore[unreachable] + else: + try: + ns = self._bindings[pfx] + except KeyError: + if pfx == "_": # Magic prefix 2001/05/30, can be changed + res.append(self.anonymousNode(ln)) + return j + if not self.turtle and pfx == "": + ns = join(self._baseURI or "", "#") + else: + self.BadSyntax(argstr, i, 'Prefix "%s:" not bound' % (pfx)) + symb = self._store.newSymbol(ns + ln) + res.append(self._variables.get(symb, symb)) + return j + + i = self.skipSpace(argstr, i) + if i < 0: + return -1 + + if argstr[i] == "?": + v: typing.List[Any] = [] + j = self.variable(argstr, i, v) + if j > 0: # Forget variables as a class, only in context. + res.append(v[0]) + return j + return -1 + + elif argstr[i] == "<": + st = i + 1 + i = argstr.find(">", st) + if i >= 0: + uref = argstr[st:i] # the join should dealt with "": + + # expand unicode escapes + uref = unicodeEscape8.sub(unicodeExpand, uref) + uref = unicodeEscape4.sub(unicodeExpand, uref) + + if self._baseURI: + uref = join(self._baseURI, uref) # was: uripath.join + else: + assert ( + ":" in uref + ), "With no base URI, cannot deal with relative URIs" + if argstr[i - 1] == "#" and not uref[-1:] == "#": + uref += "#" # She meant it! Weirdness in urlparse? + symb = self._store.newSymbol(uref) + res.append(self._variables.get(symb, symb)) + return i + 1 + self.BadSyntax(argstr, j, "unterminated URI reference") + + elif self.keywordsSet: + v = [] + j = self.bareWord(argstr, i, v) + if j < 0: + return -1 # Forget variables as a class, only in context. + if v[0] in self.keywords: + self.BadSyntax(argstr, i, 'Keyword "%s" not allowed here.' % v[0]) + res.append(self._store.newSymbol(self._bindings[""] + v[0])) + return j + else: + return -1 + + def skipSpace(self, argstr: str, i: int): + """Skip white space, newlines and comments. + return -1 if EOF, else position of first non-ws character""" + + # Most common case is a non-commented line starting with few spaces and tabs. + try: + while True: + ch = argstr[i] + if ch in {" ", "\t"}: + i += 1 + continue + elif ch not in {"#", "\r", "\n"}: + return i + break + except IndexError: + return -1 + + while 1: + m = eol.match(argstr, i) + if m is None: + break + self.lines += 1 + self.startOfLine = i = m.end() # Point to first character unmatched + m = ws.match(argstr, i) + if m is not None: + i = m.end() + m = eof.match(argstr, i) + return i if m is None else -1 + + def variable(self, argstr: str, i: int, res): + """?abc -> variable(:abc)""" + + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + + if argstr[j] != "?": + return -1 + j += 1 + i = j + if argstr[j] in numberChars: + self.BadSyntax(argstr, j, "Variable name can't start with '%s'" % argstr[j]) + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 + if self._parentContext is None: + varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] + if varURI not in self._variables: + self._variables[varURI] = self._context.newUniversal( + varURI, why=self._reason2 + ) + res.append(self._variables[varURI]) + return i + # @@ was: + # self.BadSyntax(argstr, j, + # "Can't use ?xxx syntax for variable in outermost level: %s" + # % argstr[j-1:i]) + varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] + if varURI not in self._parentVariables: + self._parentVariables[varURI] = self._parentContext.newUniversal( + varURI, why=self._reason2 + ) + res.append(self._parentVariables[varURI]) + return i + + def bareWord(self, argstr: str, i: int, res): + """abc -> :abc""" + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + + if argstr[j] in numberChars or argstr[j] in _notKeywordsChars: + return -1 + i = j + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 + res.append(argstr[j:i]) + return i + + def qname(self, argstr: str, i: int, res): + """ + xyz:def -> ('xyz', 'def') + If not in keywords and keywordsSet: def -> ('', 'def') + :def -> ('', 'def') + """ + + i = self.skipSpace(argstr, i) + if i < 0: + return -1 + + c = argstr[i] + if c in numberCharsPlus: + return -1 + len_argstr = len(argstr) + if c not in _notNameChars: + j = i + i += 1 + + try: + while argstr[i] not in _notNameChars: + i += 1 + except IndexError: + pass # Very rare. + + if argstr[i - 1] == ".": # qname cannot end with "." + i -= 1 + if i == j: + return -1 + ln = argstr[j:i] + + else: # First character is non-alpha + ln = "" # Was: None - TBL (why? useful?) + + if i < len_argstr and argstr[i] == ":": + pfx = ln + # bnodes names have different rules + if pfx == "_": + allowedChars = _notNameChars + else: + allowedChars = _notQNameChars + + i += 1 + lastslash = False + start = i + ln = "" + while i < len_argstr: + c = argstr[i] + if c == "\\" and not lastslash: # Very rare. + lastslash = True + if start < i: + ln += argstr[start:i] + start = i + 1 + elif c not in allowedChars or lastslash: # Most common case is "a-zA-Z" + if lastslash: + if c not in escapeChars: + raise BadSyntax( + self._thisDoc, + self.lines, + argstr, + i, + "illegal escape " + c, + ) + elif c == "%": # Very rare. + if ( + argstr[i + 1] not in hexChars + or argstr[i + 2] not in hexChars + ): + raise BadSyntax( + self._thisDoc, + self.lines, + argstr, + i, + "illegal hex escape " + c, + ) + lastslash = False + else: + break + i += 1 + + if lastslash: + raise BadSyntax( + self._thisDoc, self.lines, argstr, i, "qname cannot end with \\" + ) + + if argstr[i - 1] == ".": + # localname cannot end in . + if len(ln) == 0 and start == i: + return -1 + i -= 1 + + if start < i: + ln += argstr[start:i] + + res.append((pfx, ln)) + return i + + else: # delimiter was not ":" + if ln and self.keywordsSet and ln not in self.keywords: + res.append(("", ln)) + return i + return -1 + + def object(self, argstr: str, i: int, res): + j = self.subject(argstr, i, res) + if j >= 0: + return j + else: + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + else: + i = j + + ch = argstr[i] + if ch in self.string_delimiters: + ch_three = ch * 3 + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 + else: + delim = ch + i += 1 + + j, s = self.strconst(argstr, i, delim) + + res.append(self._store.newLiteral(s)) # type: ignore[call-arg] # TODO FIXME + return j + else: + return -1 + + def nodeOrLiteral(self, argstr: str, i: int, res): + j = self.node(argstr, i, res) + startline = self.lines # Remember where for error messages + if j >= 0: + return j + else: + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + else: + i = j + + ch = argstr[i] + if ch in numberCharsPlus: + m = exponent_syntax.match(argstr, i) + if m: + j = m.end() + res.append(float(argstr[i:j])) + return j + + m = decimal_syntax.match(argstr, i) + if m: + j = m.end() + res.append(Decimal(argstr[i:j])) + return j + + m = integer_syntax.match(argstr, i) + if m: + j = m.end() + res.append(long_type(argstr[i:j])) + return j + + # return -1 ## or fall through? + + ch_three = ch * 3 + if ch in self.string_delimiters: + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 + else: + delim = ch + i += 1 + + dt = None + j, s = self.strconst(argstr, i, delim) + lang = None + if argstr[j] == "@": # Language? + m = langcode.match(argstr, j + 1) + if m is None: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "Bad language code syntax on string " + "literal, after @", + ) + i = m.end() + lang = argstr[j + 1 : i] + j = i + if argstr[j : j + 2] == "^^": + res2: typing.List[Any] = [] + j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI + dt = res2[0] + res.append(self._store.newLiteral(s, dt, lang)) + return j + else: + return -1 + + def uriOf(self, sym): + if isinstance(sym, tuple): + return sym[1] # old system for --pipe + # return sym.uriref() # cwm api + return sym + + def strconst(self, argstr: str, i: int, delim): + """parse an N3 string constant delimited by delim. + return index, val + """ + delim1 = delim[0] + delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 + + j = i + ustr = "" # Empty unicode string + startline = self.lines # Remember where for error messages + len_argstr = len(argstr) + while j < len_argstr: + if argstr[j] == delim1: + if delim == delim1: # done when delim is " or ' + i = j + 1 + return i, ustr + if ( + delim == delim3 + ): # done when delim is """ or ''' and, respectively ... + if argstr[j : j + 5] == delim5: # ... we have "" or '' before + i = j + 5 + ustr += delim2 + return i, ustr + if argstr[j : j + 4] == delim4: # ... we have " or ' before + i = j + 4 + ustr += delim1 + return i, ustr + if argstr[j : j + 3] == delim3: # current " or ' is part of delim + i = j + 3 + return i, ustr + + # we are inside of the string and current char is " or ' + j += 1 + ustr += delim1 + continue + + m = interesting.search(argstr, j) # was argstr[j:]. + # Note for pos param to work, MUST be compiled ... re bug? + assert m, "Quote expected in string at ^ in %s^%s" % ( + argstr[j - 20 : j], + argstr[j : j + 20], + ) # at least need a quote + + i = m.start() + try: + ustr += argstr[j:i] + except UnicodeError: + err = "" + for c in argstr[j:i]: + err = err + (" %02x" % ord(c)) + streason = sys.exc_info()[1].__str__() + raise BadSyntax( + self._thisDoc, + startline, + argstr, + j, + "Unicode error appending characters" + + " %s to string, because\n\t%s" % (err, streason), + ) + + # print "@@@ i = ",i, " j=",j, "m.end=", m.end() + + ch = argstr[i] + if ch == delim1: + j = i + continue + elif ch in {'"', "'"} and ch != delim1: + ustr += ch + j = i + 1 + continue + elif ch in {"\r", "\n"}: + if delim == delim1: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "newline found in string literal", + ) + self.lines += 1 + ustr += ch + j = i + 1 + self.startOfLine = j + + elif ch == "\\": + j = i + 1 + ch = argstr[j] # Will be empty if string ends + if not ch: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "unterminated string literal (2)", + ) + k = "abfrtvn\\\"'".find(ch) + if k >= 0: + uch = "\a\b\f\r\t\v\n\\\"'"[k] + ustr += uch + j += 1 + elif ch == "u": + j, ch = self.uEscape(argstr, j + 1, startline) + ustr += ch + elif ch == "U": + j, ch = self.UEscape(argstr, j + 1, startline) + ustr += ch + else: + self.BadSyntax(argstr, i, "bad escape") + + self.BadSyntax(argstr, i, "unterminated string literal") + + def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): + if len(argstr) < i + n: + raise BadSyntax( + self._thisDoc, startline, argstr, i, "unterminated string literal(3)" + ) + try: + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) + except: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "bad string literal hex escape: " + argstr[i : i + n], + ) + + def uEscape(self, argstr: str, i, startline): + return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, "u") + + def UEscape(self, argstr: str, i, startline): + return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U") + + def BadSyntax(self, argstr: str, i, msg): + raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) + + +# If we are going to do operators then they should generate +# [ is operator:plus of ( \1 \2 ) ] + + +class BadSyntax(SyntaxError): + def __init__(self, uri, lines, argstr, i, why): + self._str = argstr.encode("utf-8") # Better go back to strings for errors + self._i = i + self._why = why + self.lines = lines + self._uri = uri + + def __str__(self): + argstr = self._str + i = self._i + st = 0 + if i > 60: + pre = "..." + st = i - 60 + else: + pre = "" + if len(argstr) - i > 60: + post = "..." + else: + post = "" + + return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' % ( + self.lines + 1, + self._uri, + self._why, + pre, + argstr[st:i], + argstr[i : i + 60], + post, + ) + + @property + def message(self): + return str(self) + + +############################################################################### +class Formula(object): + number = 0 + + def __init__(self, parent): + self.uuid = uuid4().hex + self.counter = 0 + Formula.number += 1 + self.number = Formula.number + self.existentials = {} + self.universals = {} + + self.quotedgraph = QuotedGraph(store=parent.store, identifier=self.id()) + + def __str__(self): + return "_:Formula%s" % self.number + + def id(self): + return BNode("_:Formula%s" % self.number) + + def newBlankNode(self, uri=None, why=None): + if uri is None: + self.counter += 1 + bn = BNode("f%sb%s" % (self.uuid, self.counter)) + else: + bn = BNode(uri.split("#").pop().replace("_", "b")) + return bn + + def newRdfstarTriple(self, hashvalue, uri=None, why=None): + if uri is None: + # self.counter += 1 + rdfstartriple = RdfstarTriple(hashvalue = hashvalue) + else: + rdfstartriple = RdfstarTriple(hashvalue = hashvalue) + return rdfstartriple + + def newUniversal(self, uri, why=None): + return Variable(uri.split("#").pop()) + + def declareExistential(self, x): + self.existentials[x] = self.newBlankNode() + + def close(self): + + return self.quotedgraph + + +r_hibyte = re.compile(r"([\x80-\xff])") + + +class RDFSink(object): + def __init__(self, graph: Graph): + self.rootFormula: Optional[Formula] = None + self.uuid = uuid4().hex + self.counter = 0 + self.graph = graph + + def newFormula(self) -> Formula: + fa = getattr(self.graph.store, "formula_aware", False) + if not fa: + raise ParserError( + "Cannot create formula parser with non-formula-aware store." + ) + f = Formula(self.graph) + return f + + def newGraph(self, identifier: Identifier) -> Graph: + return Graph(self.graph.store, identifier) + + def newSymbol(self, *args: str): + return URIRef(args[0]) + + def newBlankNode( + self, + arg: Optional[Union[Formula, Graph, Any]] = None, + uri: Optional[str] = None, + why: Optional[Callable[[], None]] = None, + ) -> BNode: + if isinstance(arg, Formula): + return arg.newBlankNode(uri) + elif isinstance(arg, Graph) or arg is None: + self.counter += 1 + bn = BNode("n%sb%s" % (self.uuid, self.counter)) + else: + bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) + return bn + + def newRdfstarTriple( + self, + # hashvalue: Optional[str], + # arg: Optional[Union[Formula, Graph, Any]] = None, + # uri: Optional[str] = None, + arg: Optional[Union[Formula, Graph, Any]] = None, + uri: Optional[str] = None, + why: Optional[Callable[[], None]] = None, + hashvalue: Optional[str] = None + ) -> RdfstarTriple: + if isinstance(arg, Formula): + return arg.newRdfstarTriple(hashvalue = hashvalue) + elif isinstance(arg, Graph) or arg is None: + # self.counter += 1 + rdfstartriple = RdfstarTriple(hashvalue =hashvalue) + else: + rdfstartriple = RdfstarTriple(hashvalue =hashvalue) + return rdfstartriple + + def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: + if dt: + return Literal(s, datatype=dt) + else: + return Literal(s, lang=lang) + + def newList(self, n: typing.List[Any], f: Optional[Formula]): + nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") + if not n: + return nil + + first = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#first") + rest = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest") + af = a = self.newBlankNode(f) + + for ne in n[:-1]: + self.makeStatement((f, first, a, ne)) + an = self.newBlankNode(f) + self.makeStatement((f, rest, a, an)) + a = an + self.makeStatement((f, first, a, n[-1])) + self.makeStatement((f, rest, a, nil)) + return af + + def newSet(self, *args): + return set(args) + + def setDefaultNamespace(self, *args) -> str: + return ":".join(repr(n) for n in args) + + def makeStatement(self, quadruple, why=None) -> None: + f, p, s, o = quadruple + + if hasattr(p, "formula"): + raise ParserError("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.add((s, p, o)) + elif isinstance(f, Formula): + f.quotedgraph.add((s, p, o)) + else: + f.add((s, p, o)) + + # return str(quadruple) + + def makerdfstarStatement(self, quadruple, why=None) -> None: + f, hashnode, p, s, o = quadruple + + if hasattr(p, "formula"): + raise ParserError("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.addStarTriple((hashnode, s, p, o)) + elif isinstance(f, Formula): + f.quotedgraph.addStarTriple((hashnode, s, p, o)) + else: + f.addStarTriple((hashnode, s, p, o)) + + # return str(quadruple) + + def normalise(self, f: Optional[Formula], n): + if isinstance(n, tuple): + return URIRef(str(n[1])) + + if isinstance(n, bool): + s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) + return s + + if isinstance(n, int) or isinstance(n, long_type): + s = Literal(str(n), datatype=INTEGER_DATATYPE) + return s + + if isinstance(n, Decimal): + value = str(n) + if value == "-0": + value = "0" + s = Literal(value, datatype=DECIMAL_DATATYPE) + return s + + if isinstance(n, float): + s = Literal(str(n), datatype=DOUBLE_DATATYPE) + return s + + if isinstance(f, Formula): + if n in f.existentials: + return f.existentials[n] + + # if isinstance(n, Var): + # if f.universals.has_key(n): + # return f.universals[n] + # f.universals[n] = f.newBlankNode() + # return f.universals[n] + + return n + + def intern(self, something: AnyT) -> AnyT: + return something + + def bind(self, pfx, uri): + pass # print pfx, ':', uri + + def startDoc(self, formula: Optional[Formula]): + self.rootFormula = formula + + def endDoc(self, formula: Optional[Formula]) -> None: + pass + + +################################################### +# +# Utilities +# + + +def hexify(ustr): + """Use URL encoding to return an ASCII string + corresponding to the given UTF8 string + >>> hexify("http://example/a b") + b'http://example/a%20b' + """ + # s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 126 or ord(ch) < 33: + ch = "%%%02X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return s.encode("latin-1") + + +class TrigParser(Parser): + + """ + An RDFLib parser for Turtle + See http://www.w3.org/TR/turtle/ + """ + + def __init__(self): + pass + + def parse( + self, + source: "InputSource", + graph: Graph, + encoding: Optional[str] = "utf-8", + turtle: bool = True, + ): + if encoding not in [None, "utf-8"]: + raise ParserError( + "N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding + ) + + sink = RDFSink(graph) + + baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") + p = SinkParser(sink, baseURI=baseURI, turtle=turtle) + # N3 parser prefers str stream + # stream = source.getCharacterStream() + # if not stream: + # stream = source.getByteStream() + # p.loadStream(stream) + + if hasattr(source, "file"): + f = open(source.file.name, "rb") + rdbytes = f.read() + f.close() + elif hasattr(source, "_InputSource__bytefile"): + if hasattr(source._InputSource__bytefile, "wrapped"): + f = open((source._InputSource__bytefile.wrapped.strip().splitlines())[0], "rb") # what if multiple files + rdbytes = f.read() + f.close() + + bp = rdbytes.decode("utf-8") + ou = RDFstarParsings(bp) + p.feed(ou) + p.endDoc() + for prefix, namespace in p._bindings.items(): + graph.bind(prefix, namespace) diff --git a/rdflib/plugins/parsers/turtlestar.py b/rdflib/plugins/parsers/turtlestar.py new file mode 100644 index 000000000..7501f8bda --- /dev/null +++ b/rdflib/plugins/parsers/turtlestar.py @@ -0,0 +1,2452 @@ +#!/usr/bin/env python +""" +notation3.py - Standalone Notation3 Parser +Derived from CWM, the Closed World Machine +Authors of the original suite: +* Dan Connolly <@@> +* Tim Berners-Lee <@@> +* Yosi Scharf <@@> +* Joseph M. Reagle Jr. +* Rich Salz +http://www.w3.org/2000/10/swap/notation3.py +Copyright 2000-2007, World Wide Web Consortium. +Copyright 2001, MIT. +Copyright 2001, Zolera Systems Inc. +License: W3C Software License +http://www.w3.org/Consortium/Legal/copyright-software +Modified by Sean B. Palmer +Copyright 2007, Sean B. Palmer. +Modified to work with rdflib by Gunnar Aastrand Grimnes +Copyright 2010, Gunnar A. Grimnes +""" +import codecs +import os +import re +from smtplib import quotedata +import sys +import rdflib + +# importing typing for `typing.List` because `List`` is used for something else +import typing +from decimal import Decimal +from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union +from uuid import uuid4 + +from rdflib.compat import long_type +from rdflib.exceptions import ParserError +from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph +from rdflib.term import ( + _XSD_PFX, + RdfstarTriple, + BNode, + Identifier, + Literal, + Node, + URIRef, + Variable, + _unique_id, +) + +__all__ = [ + "BadSyntax", + "N3Parser", + "TurtleParser", + "splitFragP", + "join", + "base", + "runNamespace", + "uniqueURI", + "hexify", +] + +from rdflib.parser import Parser + +if TYPE_CHECKING: + from rdflib.parser import InputSource + +AnyT = TypeVar("AnyT") + + +def splitFragP(uriref, punct=0): + """split a URI reference before the fragment + Punctuation is kept. + e.g. + >>> splitFragP("abc#def") + ('abc', '#def') + >>> splitFragP("abcdef") + ('abcdef', '') + """ + + i = uriref.rfind("#") + if i >= 0: + return uriref[:i], uriref[i:] + else: + return uriref, "" + + +def join(here, there): + """join an absolute URI and URI reference + (non-ascii characters are supported/doctested; + haven't checked the details of the IRI spec though) + ``here`` is assumed to be absolute. + ``there`` is URI reference. + >>> join('http://example/x/y/z', '../abc') + 'http://example/x/abc' + Raise ValueError if there uses relative path + syntax but here has no hierarchical path. + >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE + Traceback (most recent call last): + raise ValueError(here) + ValueError: Base has no slash + after colon - with relative '../foo'. + >>> join('http://example/x/y/z', '') + 'http://example/x/y/z' + >>> join('mid:foo@example', '#foo') + 'mid:foo@example#foo' + We grok IRIs + >>> len(u'Andr\\xe9') + 5 + >>> join('http://example.org/', u'#Andr\\xe9') + u'http://example.org/#Andr\\xe9' + """ + + # assert(here.find("#") < 0), \ + # "Base may not contain hash: '%s'" % here # why must caller splitFrag? + + slashl = there.find("/") + colonl = there.find(":") + + # join(base, 'foo:/') -- absolute + if colonl >= 0 and (slashl < 0 or colonl < slashl): + return there + + bcolonl = here.find(":") + assert bcolonl >= 0, ( + "Base uri '%s' is not absolute" % here + ) # else it's not absolute + + path, frag = splitFragP(there) + if not path: + return here + frag + + # join('mid:foo@example', '../foo') bzzt + if here[bcolonl + 1 : bcolonl + 2] != "/": + raise ValueError( + "Base <%s> has no slash after " + "colon - with relative '%s'." % (here, there) + ) + + if here[bcolonl + 1 : bcolonl + 3] == "//": + bpath = here.find("/", bcolonl + 3) + else: + bpath = bcolonl + 1 + + # join('http://xyz', 'foo') + if bpath < 0: + bpath = len(here) + here = here + "/" + + # join('http://xyz/', '//abc') => 'http://abc' + if there[:2] == "//": + return here[: bcolonl + 1] + there + + # join('http://xyz/', '/abc') => 'http://xyz/abc' + if there[:1] == "/": + return here[:bpath] + there + + slashr = here.rfind("/") + + while 1: + if path[:2] == "./": + path = path[2:] + if path == ".": + path = "" + elif path[:3] == "../" or path == "..": + path = path[3:] + i = here.rfind("/", bpath, slashr) + if i >= 0: + here = here[: i + 1] + slashr = i + else: + break + + return here[: slashr + 1] + path + frag + + +def base(): + """The base URI for this process - the Web equiv of cwd + Relative or absolute unix-standard filenames parsed relative to + this yield the URI of the file. + If we had a reliable way of getting a computer name, + we should put it in the hostname just to prevent ambiguity + """ + # return "file://" + hostname + os.getcwd() + "/" + return "file://" + _fixslash(os.getcwd()) + "/" + + +def _fixslash(s): + """Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" + s = s.replace("\\", "/") + if s[0] != "/" and s[1] == ":": + s = s[2:] # @@@ Hack when drive letter present + return s + + +CONTEXT = 0 +PRED = 1 +SUBJ = 2 +OBJ = 3 + +PARTS = PRED, SUBJ, OBJ +ALL4 = CONTEXT, PRED, SUBJ, OBJ + +SYMBOL = 0 +FORMULA = 1 +LITERAL = 2 +LITERAL_DT = 21 +LITERAL_LANG = 22 +ANONYMOUS = 3 +XMLLITERAL = 25 + +Logic_NS = "http://www.w3.org/2000/10/swap/log#" +NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging +forSomeSym = Logic_NS + "forSome" +forAllSym = Logic_NS + "forAll" + +RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" +RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +OWL_NS = "http://www.w3.org/2002/07/owl#" +DAML_sameAs_URI = OWL_NS + "sameAs" +parsesTo_URI = Logic_NS + "parsesTo" +RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" + +List_NS = RDF_NS_URI # From 20030808 +_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" + +N3_first = (SYMBOL, List_NS + "first") +N3_rest = (SYMBOL, List_NS + "rest") +N3_li = (SYMBOL, List_NS + "li") +N3_nil = (SYMBOL, List_NS + "nil") +N3_List = (SYMBOL, List_NS + "List") +N3_Empty = (SYMBOL, List_NS + "Empty") + + +runNamespaceValue = None + + +def runNamespace(): + """Returns a URI suitable as a namespace for run-local objects""" + # @@@ include hostname (privacy?) (hash it?) + global runNamespaceValue + if runNamespaceValue is None: + runNamespaceValue = join(base(), _unique_id()) + "#" + return runNamespaceValue + + +nextu = 0 + +import re +import lark +import hashlib +from lark import ( + Lark, + Transformer, + Tree, +) +from lark.visitors import Visitor +from lark.reconstruct import Reconstructor + +from lark.lexer import ( + Token, +) + +# from pymantic.compat import ( +# binary_type, +# ) +# from pymantic.parsers.base import ( +# BaseParser, +# ) +# from pymantic.primitives import ( +# BlankNode, +# Literal, +# NamedNode, +# Triple, +# ) +# from pymantic.util import ( +# grouper, +# smart_urljoin, +# decode_literal, +# ) + +grammar = r"""turtle_doc: statement* +?statement: directive | triples "." | quotedtriples "." +directive: prefix_id | base | sparql_prefix | sparql_base +prefix_id: "@prefix" PNAME_NS IRIREF "." +base: BASE_DIRECTIVE IRIREF "." +sparql_base: /BASE/i IRIREF +sparql_prefix: /PREFIX/i PNAME_NS IRIREF +triples: subject predicate_object_list + | blank_node_property_list predicate_object_list? +insidequotation: qtsubject verb qtobject +quotedtriples: triples compoundanno +predicate_object_list: verb object_list (";" (verb object_list)?)* +?object_list: object ("," object)* +?verb: predicate | /a/ +?subject: iri | blank_node | collection | quotation +?predicate: iri +?object: iri | blank_node | collection | blank_node_property_list | literal | quotation +?literal: rdf_literal | numeric_literal | boolean_literal +?qtsubject: iri | blank_node | quotation +?qtobject: iri | blank_node | literal | quotation +ANGLEBRACKETL: "<<" +ANGLEBRACKETR: ">>" +quotation: ANGLEBRACKETL insidequotation ANGLEBRACKETR +COMPOUNDL: "{|" +COMPOUNDR: "|}" +compoundanno: COMPOUNDL predicate_object_list COMPOUNDR +blank_node_property_list: "[" predicate_object_list "]" +collection: "(" object* ")" +numeric_literal: INTEGER | DECIMAL | DOUBLE +rdf_literal: string (LANGTAG | "^^" iri)? +boolean_literal: /true|false/ +string: STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE + | STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE +iri: IRIREF | prefixed_name +prefixed_name: PNAME_LN | PNAME_NS +blank_node: BLANK_NODE_LABEL | ANON + +BASE_DIRECTIVE: "@base" +IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" +PNAME_NS: PN_PREFIX? ":" +PNAME_LN: PNAME_NS PN_LOCAL +BLANK_NODE_LABEL: "_:" (PN_CHARS_U | /[0-9]/) ((PN_CHARS | ".")* PN_CHARS)? +LANGTAG: "@" /[a-zA-Z]+/ ("-" /[a-zA-Z0-9]+/)* +INTEGER: /[+-]?[0-9]+/ +DECIMAL: /[+-]?[0-9]*/ "." /[0-9]+/ +DOUBLE: /[+-]?/ (/[0-9]+/ "." /[0-9]*/ EXPONENT + | "." /[0-9]+/ EXPONENT | /[0-9]+/ EXPONENT) +EXPONENT: /[eE][+-]?[0-9]+/ +STRING_LITERAL_QUOTE: "\"" (/[^\x22\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "\"" +STRING_LITERAL_SINGLE_QUOTE: "'" (/[^\x27\x5C\x0A\x0D]/ | ECHAR | UCHAR)* "'" +STRING_LITERAL_LONG_SINGLE_QUOTE: "'''" (/'|''/? (/[^'\\]/ | ECHAR | UCHAR))* "'''" +STRING_LITERAL_LONG_QUOTE: "\"\"\"" (/"|""/? (/[^"\\]/ | ECHAR | UCHAR))* "\"\"\"" +UCHAR: "\\u" HEX~4 | "\\U" HEX~8 +ECHAR: "\\" /[tbnrf"'\\]/ +WS: /[\x20\x09\x0D\x0A]/ +ANON: "[" WS* "]" +PN_CHARS_BASE: /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF]/ +PN_CHARS_U: PN_CHARS_BASE | "_" +PN_CHARS: PN_CHARS_U | /[\-0-9\u00B7\u0300-\u036F\u203F-\u2040]/ +PN_PREFIX: PN_CHARS_BASE ((PN_CHARS | ".")* PN_CHARS)? +PN_LOCAL: (PN_CHARS_U | ":" | /[0-9]/ | PLX) ((PN_CHARS | "." | ":" | PLX)* (PN_CHARS | ":" | PLX))? +PLX: PERCENT | PN_LOCAL_ESC +PERCENT: "%" HEX~2 +HEX: /[0-9A-Fa-f]/ +PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ + +%ignore WS +COMMENT: "#" /[^\n]/* +%ignore COMMENT +""" + +turtle_lark = Lark(grammar, start="turtle_doc", parser="lalr", maybe_placeholders=False) + +from lark import Visitor, v_args +quotation_list = [] +quotation_dict = dict() +vblist = [] +quotationreif = [] +prefix_list = [] +quotationannolist = [] +constructors = "" +assertedtriplelist = [] +quoted_or_not = False +both_quoted_and_asserted = False + +def myHash(text:str): + return str(hashlib.md5(text.encode('utf-8')).hexdigest()) + +class FindVariables(Visitor): + def __init__(self): + super().__init__() + # self.quotation_list = [] + self.variable_list = [] + + def quotation(self, var): + qut = Reconstructor(turtle_lark).reconstruct(var) + qut = qut.replace(";", "") + qut = qut.replace(" ", "") + if not (qut in quotation_list): + quotation_list.append(qut) + + vr = Reconstructor(turtle_lark).reconstruct(var) + vr = vr.replace(";","") + + quotation_dict[qut] = str(myHash(qut)) + "RdfstarTriple" + qut_hash = ":" + str(myHash(qut)) + # try: + id = quotation_dict.get(vr) + for x in quotation_dict: + if x in vr: + vr = vr.replace(x, ":"+quotation_dict.get(x)) + vr = vr.replace("<<", "") + vr = vr.replace(">>", "") + output = vr.split(":") + output.pop(0) + oa1 = Reconstructor(turtle_lark).reconstruct(var) + oa1 = oa1.replace(";","") + # oa1 = oa1.replace(" ","") + output.append(oa1) + # print(quotationreif) + if (not (output in quotationreif)): + quotationreif.append(output) + + def blank_node_property_list(self, var): + object_list = ((var.children[0]).children)[1].children + + for x in range(0, len(object_list)): + try: + if object_list[x].data == 'quotation': + collection_quotation_reconstruct = Reconstructor(turtle_lark).reconstruct(object_list[x]) + collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") + t2 = quotation_dict[collection_quotation_reconstruct] + hasht2 = "_:" + t2 + object_list[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) + + except Exception as ex: + object_list = ((var.children[0]).children)[1] + collection_quotation_reconstruct = Reconstructor(turtle_lark).reconstruct(object_list) + collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") + try: + t2 = quotation_dict[collection_quotation_reconstruct] + hasht2 = "_:" + t2 + ((var.children[0]).children)[1] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) + break + except Exception as ex2: + pass + + def collection(self, var): + for x in range(0, len(var.children)): + if var.children[x].data == 'quotation': + collection_quotation_reconstruct = Reconstructor(turtle_lark).reconstruct(var.children[x]) + collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") + t2 = quotation_dict[collection_quotation_reconstruct] + hasht2 = "_:" + t2 + var.children[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) + + def quotedtriples(self, var): + triple1 = None + subjecthash = "" + + for x in var.children: + if x.data == "triples": + triple1 = Reconstructor(turtle_lark).reconstruct(x) + triple1 = triple1.replace(";","") + + triple1 = "<<"+triple1+">>" + subjecthash = "_:" + str(myHash(triple1)) + "RdfstarTriple" + if not (triple1 in quotation_list): + quotation_list.append(triple1) + + quotation_dict[triple1] = str(myHash(triple1)) + "RdfstarTriple" + elif x.data == "compoundanno": + for y in x.children: + if (y != "{|") & (y!= "|}"): + count2 = 0 + quotationtriple = [] + for z in y.children: + count2+=1 + z2 = Reconstructor(turtle_lark).reconstruct(z) + quotationtriple.append(z2) + if count2 ==2: + quotationtriple.insert(0, subjecthash) + quotationannolist.append(quotationtriple) + count2 = 0 + quotationtriple = [] + + def triples(self, var): + + appends1 = [] + tri = Reconstructor(turtle_lark).reconstruct(var) + if ("[" in tri) and (not "RdfstarTriple" in tri) and (not "<<" in tri): + vblist.append([tri]) + else: + tri = tri.replace(";", "") + if not (tri in assertedtriplelist): + assertedtriplelist.append(tri) + for x in var.children: + if x.data == 'predicate_object_list': + xc = x.children + for y in xc: + try: + x2 = Reconstructor(turtle_lark).reconstruct(y) + except: + appends1.pop(0) + appends1.append("standard reification") + appends1.append(Reconstructor(turtle_lark).reconstruct(var)) + appends1.append(" . \n") + break + x2 = x2.replace(";","") + appends1.append(x2) + else: + anyquotationin = False + x1 = Reconstructor(turtle_lark).reconstruct(x) + appends1.append(x1) + + if not (appends1 in vblist): + vblist.append(appends1) + + def insidequotation(self, var): + appends1 = [] + for x in var.children: + x1 = Reconstructor(turtle_lark).reconstruct(x) + x1 = x1.replace(";","") + appends1.append(x1) + + if not (appends1 in vblist): + vblist.append(appends1) + + def prefix_id(self, children): + pass + + def sparql_prefix(self, children): + prefix_list.append(children) + + def base(self, children): + base_directive, base_iriref = children + if base_directive.startswith('@') and base_directive != '@base': + raise ValueError('Unexpected @base: ' + base_directive) + +def RDFstarParsings(rdfstarstring): + global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted + quotationannolist = [] + vblist = [] + quotationreif = [] + prefix_list = [] + constructors = "" + quoted_or_not = False + both_quoted_and_asserted = False + tree = turtle_lark.parse(rdfstarstring) + at = FindVariables().visit(tree) + for y in vblist: + for element_index in range(0, len(y)): + if (y[element_index][0] == "_") & (not (element_index == 0)): + y[element_index]=" "+y[element_index] + result = "".join(y) + if "standard reification" in result: + result = result.replace("standard reification", "") + constructors+=result + else: + result = result.replace(" ", "") + if result in assertedtriplelist: + test1 = "<<"+result+">>" + if test1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + else: + test2 = "<<"+result+">>" + if test2 in quotation_list: + both_quoted_and_asserted = False + quoted_or_not = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + result = "<<"+result+">>" + if not (result in quotation_list): + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = y[z].replace(" ", "") + y[z] = "_:"+quotation_dict[y[z]] + myvalue = str(myHash(result)) + try: + subject = y[0] + predicate = y[1] + object = y[2] + except: + if len(y)==1: + result2 = y[0] + constructors+=result2 + constructors = constructors +".\n" + continue + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + else: + value = quotation_dict[result] + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = "_:"+quotation_dict[y[z]] + subject = y[0] + predicate = y[1] + object = y[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + + for z in quotationannolist: + result1 = "".join(z) + result1 = "<<"+result1+">>" + if result1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False + value = str(myHash(result1)) + subject = z[0] + predicate = z[1] + object = z[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object + + for x in range(0, len(prefix_list)): + prefix_list[x] = Reconstructor(turtle_lark).reconstruct(prefix_list[x]) + constructors = prefix_list[x]+"\n"+constructors + + if ((not ("PREFIX rdfstar: " in constructors)) and (not("PREFIX rdfstar:" in constructors))): + constructors = "PREFIX rdfstar: \n"+constructors + + constructors = "PREFIX rdf: \n"+constructors + + if not (("PREFIX : " in constructors) or ("PREFIX:" in constructors)): + constructors = "PREFIX : \n"+constructors + + if "PREFIX:" in constructors: + constructors = constructors.replace("PREFIX:", "PREFIX :") + + print("input after preprocessing: ", constructors) + constructors = bytes(constructors, 'utf-8') + return constructors + +def uniqueURI(): + """A unique URI""" + global nextu + nextu += 1 + return runNamespace() + "u_" + str(nextu) + +tracking = False +chatty_flag = 50 + +# from why import BecauseOfData, becauseSubexpression + +def BecauseOfData(*args, **kargs): + # print args, kargs + pass + + +def becauseSubexpression(*args, **kargs): + # print args, kargs + pass + + +N3_forSome_URI = forSomeSym +N3_forAll_URI = forAllSym + +# Magic resources we know about + +ADDED_HASH = "#" # Stop where we use this in case we want to remove it! +# This is the hash on namespace URIs + +RDF_type = (SYMBOL, RDF_type_URI) +DAML_sameAs = (SYMBOL, DAML_sameAs_URI) + +LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" + +BOOLEAN_DATATYPE = _XSD_PFX + "boolean" +DECIMAL_DATATYPE = _XSD_PFX + "decimal" +DOUBLE_DATATYPE = _XSD_PFX + "double" +FLOAT_DATATYPE = _XSD_PFX + "float" +INTEGER_DATATYPE = _XSD_PFX + "integer" + +option_noregen = 0 # If set, do not regenerate genids on output + +# @@ I18n - the notname chars need extending for well known unicode non-text +# characters. The XML spec switched to assuming unknown things were name +# characters. +# _namechars = string.lowercase + string.uppercase + string.digits + '_-' +_notQNameChars = set("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ +_notKeywordsChars = _notQNameChars | {"."} +_notNameChars = _notQNameChars | {":"} # Assume anything else valid name :-/ +_rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +hexChars = set("ABCDEFabcdef0123456789") +escapeChars = set("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames +numberChars = set("0123456789-") +numberCharsPlus = numberChars | {"+", "."} + + +def unicodeExpand(m): + try: + return chr(int(m.group(1), 16)) + except: + raise Exception("Invalid unicode code point: " + m.group(1)) + + +unicodeEscape4 = re.compile(r"\\u([0-9a-fA-F]{4})") +unicodeEscape8 = re.compile(r"\\U([0-9a-fA-F]{8})") + + +N3CommentCharacter = "#" # For unix script # ! compatibility + +# Parse string to sink +# +# Regular expressions: +eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment +eof = re.compile(r"[ \t]*(#[^\n]*)?$") # end of file, poss. w/comment +ws = re.compile(r"[ \t]*") # Whitespace not including NL +signed_integer = re.compile(r"[-+]?[0-9]+") # integer +integer_syntax = re.compile(r"[-+]?[0-9]+") +decimal_syntax = re.compile(r"[-+]?[0-9]*\.[0-9]+") +exponent_syntax = re.compile( + r"[-+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:e|E)[-+]?[0-9]+" +) +digitstring = re.compile(r"[0-9]+") # Unsigned integer +interesting = re.compile(r"""[\\\r\n\"\']""") +langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") + +quoted_triple_list = [] +class SinkParser: + def __init__( + self, + store: "RDFSink", + openFormula: Optional["Formula"] = None, + thisDoc: str = "", + baseURI: Optional[str] = None, + genPrefix: str = "", + why: Optional[Callable[[], None]] = None, + turtle: bool = False, + ): + """note: namespace names should *not* end in # ; + the # will get added during qname processing""" + + self._bindings = {} + if thisDoc != "": + assert ":" in thisDoc, "Document URI not absolute: <%s>" % thisDoc + self._bindings[""] = thisDoc + "#" # default + + self._store = store + if genPrefix: + # TODO FIXME: there is no function named setGenPrefix + store.setGenPrefix(genPrefix) # type: ignore[attr-defined] # pass it on + + self._thisDoc = thisDoc + self.lines = 0 # for error handling + self.startOfLine = 0 # For calculating character number + self._genPrefix = genPrefix + self.keywords = ["a", "this", "bind", "has", "is", "of", "true", "false"] + self.keywordsSet = 0 # Then only can others be considered qnames + self._anonymousNodes: Dict[str, Node] = {} + self._rdfstartripleNodes: Dict[str, Node] = {} + # Dict of anon nodes already declared ln: Term + self._variables: Dict[Identifier, Identifier] = {} + self._parentVariables: Dict[Identifier, Identifier] = {} + self._reason = why # Why the parser was asked to parse this + + self.turtle = turtle # raise exception when encountering N3 extensions + # Turtle allows single or double quotes around strings, whereas N3 + # only allows double quotes. + self.string_delimiters = ('"', "'") if turtle else ('"',) + + self._reason2 = None # Why these triples + # was: diag.tracking + if tracking: + self._reason2 = BecauseOfData( + store.newSymbol(thisDoc), because=self._reason + ) + + self._baseURI: Optional[str] + if baseURI: + self._baseURI = baseURI + else: + if thisDoc: + self._baseURI = thisDoc + else: + self._baseURI = None + + assert not self._baseURI or ":" in self._baseURI + + if not self._genPrefix: + if self._thisDoc: + self._genPrefix = self._thisDoc + "#_g" + else: + self._genPrefix = uniqueURI() + + self._formula: Formula + if openFormula is None and not turtle: + if self._thisDoc: + # TODO FIXME: store.newFormula does not take any arguments + self._formula = store.newFormula(thisDoc + "#_formula") # type: ignore[call-arg] + else: + self._formula = store.newFormula() + else: + self._formula = openFormula # type: ignore[assignment] + + self._context = self._formula + self._parentContext: Optional[Formula] = None + + def here(self, i: int) -> str: + """String generated from position in file + This is for repeatability when referring people to bnodes in a document. + This has diagnostic uses less formally, as it should point one to which + bnode the arbitrary identifier actually is. It gives the + line and character number of the '[' charcacter or path character + which introduced the blank node. The first blank node is boringly + _L1C1. It used to be used only for tracking, but for tests in general + it makes the canonical ordering of bnodes repeatable.""" + + return "%s_L%iC%i" % (self._genPrefix, self.lines, i - self.startOfLine + 1) + + def formula(self): + return self._formula + + def loadStream(self, stream: Union[IO[str], IO[bytes]]) -> Optional["Formula"]: + return self.loadBuf(stream.read()) # Not ideal + + def loadBuf(self, buf: Union[str, bytes]): + """Parses a buffer and returns its top level formula""" + self.startDoc() + + self.feed(buf) + return self.endDoc() # self._formula + + def feed(self, octets: Union[str, bytes]): + """Feed an octet stream to the parser + if BadSyntax is raised, the string + passed in the exception object is the + remainder after any statements have been parsed. + So if there is more data to feed to the + parser, it should be straightforward to recover.""" + + if not isinstance(octets, str): + s = octets.decode("utf-8") + # NB already decoded, so \ufeff + if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode("utf-8"): + s = s[1:] + else: + s = octets + + i = 0 + while i >= 0: + j = self.skipSpace(s, i) + if j < 0: + return + i = self.directiveOrStatement(s, j) + if i < 0: + # print("# next char: %s" % s) + self.BadSyntax(s, j, "expected directive or statement") + + def directiveOrStatement(self, argstr: str, h: int) -> int: + + i = self.skipSpace(argstr, h) + if i < 0: + return i # EOF + + if self.turtle: + j = self.sparqlDirective(argstr, i) + if j >= 0: + return j + + j = self.directive(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + j = self.statement(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + + return j + + # @@I18N + # _namechars = string.lowercase + string.uppercase + string.digits + '_-' + + def tok(self, tok: str, argstr: str, i: int, colon: bool = False): + """Check for keyword. Space must have been stripped on entry and + we must not be at end of file. + if colon, then keyword followed by colon is ok + (@prefix: is ok, rdf:type shortcut a must be followed by ws) + """ + + assert tok[0] not in _notNameChars # not for punctuation + if argstr[i] == "@": + i += 1 + else: + if tok not in self.keywords: + return -1 # No, this has neither keywords declaration nor "@" + + i_plus_len_tok = i + len(tok) + if ( + argstr[i:i_plus_len_tok] == tok + and (argstr[i_plus_len_tok] in _notKeywordsChars) + or (colon and argstr[i_plus_len_tok] == ":") + ): + return i_plus_len_tok + else: + return -1 + + def sparqlTok(self, tok: str, argstr: str, i: int) -> int: + """Check for SPARQL keyword. Space must have been stripped on entry + and we must not be at end of file. + Case insensitive and not preceded by @ + """ + + assert tok[0] not in _notNameChars # not for punctuation + + len_tok = len(tok) + if argstr[i : i + len_tok].lower() == tok.lower() and ( + argstr[i + len_tok] in _notQNameChars + ): + i += len_tok + return i + else: + return -1 + + def directive(self, argstr: str, i: int) -> int: + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + res: typing.List[Any] = [] + + j = self.tok("bind", argstr, i) # implied "#". Obsolete. + if j > 0: + self.BadSyntax(argstr, i, "keyword bind is obsolete: use @prefix") + + j = self.tok("keywords", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.bareWord) + if i < 0: + self.BadSyntax( + argstr, i, "'@keywords' needs comma separated list of words" + ) + self.setKeywords(res[:]) + return i + + j = self.tok("forAll", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) + if i < 0: + self.BadSyntax(argstr, i, "Bad variable list after @forAll") + for x in res: + # self._context.declareUniversal(x) + if x not in self._variables or x in self._parentVariables: + self._variables[x] = self._context.newUniversal(x) + return i + + j = self.tok("forSome", argstr, i) + if j > 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") + + i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) + if i < 0: + self.BadSyntax(argstr, i, "Bad variable list after @forSome") + for x in res: + self._context.declareExistential(x) + return i + + j = self.tok("prefix", argstr, i, colon=True) # no implied "#" + if j >= 0: + t: typing.List[Any] = [] + i = self.qname(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected qname after @prefix") + j = self.uri_ref2(argstr, i, t) + if j < 0: + self.BadSyntax(argstr, i, "expected after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + self.BadSyntax( + argstr, + j, + f"With no base URI, cannot use relative URI in @prefix <{ns}>", + ) + assert ":" in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j = self.tok("base", argstr, i) # Added 2007/7/7 + if j >= 0: + t = [] + i = self.uri_ref2(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + self.BadSyntax( + argstr, + j, + "With no previous base URI, cannot use " + + "relative URI in @base <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def sparqlDirective(self, argstr: str, i: int): + + """ + turtle and trig support BASE/PREFIX without @ and without + terminating . + """ + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + + j = self.sparqlTok("PREFIX", argstr, i) + if j >= 0: + t: typing.List[Any] = [] + i = self.qname(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected qname after @prefix") + j = self.uri_ref2(argstr, i, t) + if j < 0: + self.BadSyntax(argstr, i, "expected after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + self.BadSyntax( + argstr, + j, + "With no base URI, cannot use " + + "relative URI in @prefix <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j = self.sparqlTok("BASE", argstr, i) + if j >= 0: + t = [] + i = self.uri_ref2(argstr, j, t) + if i < 0: + self.BadSyntax(argstr, j, "expected after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + self.BadSyntax( + argstr, + j, + "With no previous base URI, cannot use " + + "relative URI in @base <" + + ns + + ">", + ) + assert ":" in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def bind(self, qn: str, uri: bytes) -> None: + assert isinstance(uri, bytes), "Any unicode must be %x-encoded already" + if qn == "": + self._store.setDefaultNamespace(uri) + else: + self._store.bind(qn, uri) + + def setKeywords(self, k: Optional[typing.List[str]]): + """Takes a list of strings""" + if k is None: + self.keywordsSet = 0 + else: + self.keywords = k + self.keywordsSet = 1 + + def startDoc(self) -> None: + # was: self._store.startDoc() + self._store.startDoc(self._formula) + + def endDoc(self) -> Optional["Formula"]: + """Signal end of document and stop parsing. returns formula""" + self._store.endDoc(self._formula) # don't canonicalize yet + return self._formula + + def makeStatement(self, quadruple): + # $$$$$$$$$$$$$$$$$$$$$ + # print "# Parser output: ", `quadruple` + self._store.makeStatement(quadruple, why=self._reason2) + + def makerdfstarStatement(self, quadruple): + # $$$$$$$$$$$$$$$$$$$$$ + # print "# Parser output: ", `quadruple` + self._store.makerdfstarStatement(quadruple, why=self._reason2) + + def statement(self, argstr: str, i: int) -> int: + r: typing.List[Any] = [] + i = self.object(argstr, i, r) # Allow literal for subject - extends RDF + if i < 0: + return i + + j = self.property_list(argstr, i, r[0]) + + if j < 0: + self.BadSyntax(argstr, i, "expected propertylist") + return j + + def subject(self, argstr: str, i: int, res: typing.List[Any]) -> int: + return self.item(argstr, i, res) + + def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: + """has _prop_ + is _prop_ of + a + = + _prop_ + >- prop -> + <- prop -< + _operator_""" + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + + r: typing.List[Any] = [] + + j = self.tok("has", argstr, i) + if j >= 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") + + i = self.prop(argstr, j, r) + if i < 0: + self.BadSyntax(argstr, j, "expected property after 'has'") + res.append(("->", r[0])) + return i + + j = self.tok("is", argstr, i) + if j >= 0: + if self.turtle: + self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") + + i = self.prop(argstr, j, r) + if i < 0: + self.BadSyntax(argstr, j, "expected after 'is'") + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "End of file found, expected property after 'is'" + ) + i = j + j = self.tok("of", argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected 'of' after 'is' ") + res.append(("<-", r[0])) + return j + + j = self.tok("a", argstr, i) + if j >= 0: + res.append(("->", RDF_type)) + return j + + if argstr[i : i + 2] == "<=": + if self.turtle: + self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") + + res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) + return i + 2 + + if argstr[i] == "=": + if self.turtle: + self.BadSyntax(argstr, i, "Found '=' in Turtle mode") + if argstr[i + 1] == ">": + res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) + return i + 2 + res.append(("->", DAML_sameAs)) + return i + 1 + + if argstr[i : i + 2] == ":=": + if self.turtle: + self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") + + # patch file relates two formulae, uses this @@ really? + res.append(("->", Logic_NS + "becomes")) + return i + 2 + + j = self.prop(argstr, i, r) + if j >= 0: + res.append(("->", r[0])) + return j + + if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": + self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") + + return -1 + + def prop(self, argstr: str, i: int, res): + return self.item(argstr, i, res) + + def item(self, argstr: str, i, res): + return self.path(argstr, i, res) + + def blankNode(self, uri=None): + return self._store.newBlankNode(self._context, uri, why=self._reason2) + + def path(self, argstr: str, i: int, res): + """Parse the path production.""" + j = self.nodeOrLiteral(argstr, i, res) + if j < 0: + return j # nope + + while argstr[j] in {"!", "^"}: # no spaces, must follow exactly (?) + ch = argstr[j] + subj = res.pop() + obj = self.blankNode(uri=self.here(j)) + j = self.node(argstr, j + 1, res) + if j < 0: + self.BadSyntax(argstr, j, "EOF found in middle of path syntax") + pred = res.pop() + if ch == "^": # Reverse traverse + self.makeStatement((self._context, pred, obj, subj)) + else: + self.makeStatement((self._context, pred, subj, obj)) + res.append(obj) + return j + + def anonymousNode(self, ln: str): + """Remember or generate a term for one of these _: anonymous nodes""" + if ("RdfstarTriple" in ln): + term = self._rdfstartripleNodes.get(ln, None) + if term is not None: + return term + term = self._store.newRdfstarTriple(self._context, why=self._reason2, hashvalue = ln) + self._rdfstartripleNodes[ln] = term + return term + term = self._anonymousNodes.get(ln, None) + if term is not None: + return term + term = self._store.newBlankNode(self._context, why=self._reason2) + self._anonymousNodes[ln] = term + return term + + def node(self, argstr: str, i: int, res, subjectAlready=None): + """Parse the production. + Space is now skipped once at the beginning + instead of in multiple calls to self.skipSpace(). + """ + subj = subjectAlready + + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + i = j + ch = argstr[i] # Quick 1-character checks first: + + if ch == "[": + bnodeID = self.here(i) + j = self.skipSpace(argstr, i + 1) + if j < 0: + self.BadSyntax(argstr, i, "EOF after '['") + # Hack for "is" binding name to anon node + if argstr[j] == "=": + if self.turtle: + self.BadSyntax( + argstr, j, "Found '[=' or '[ =' when in turtle mode." + ) + i = j + 1 + objs: typing.List[Any] = [] + j = self.objectList(argstr, i, objs) + if j >= 0: + subj = objs[0] + if len(objs) > 1: + for obj in objs: + self.makeStatement((self._context, DAML_sameAs, subj, obj)) + j = self.skipSpace(argstr, j) + if j < 0: + self.BadSyntax( + argstr, i, "EOF when objectList expected after [ = " + ) + if argstr[j] == ";": + j += 1 + else: + self.BadSyntax(argstr, i, "objectList expected after [= ") + + if subj is None: + subj = self.blankNode(uri=bnodeID) + i = self.property_list(argstr, j, subj) + if i < 0: + self.BadSyntax(argstr, j, "property_list expected") + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "EOF when ']' expected after [ " + ) + if argstr[j] != "]": + self.BadSyntax(argstr, j, "']' expected") + res.append(subj) + return j + 1 + + if not self.turtle and ch == "{": + # if self.turtle: + # self.BadSyntax(argstr, i, + # "found '{' while in Turtle mode, Formulas not supported!") + ch2 = argstr[i + 1] + if ch2 == "$": + # a set + i += 1 + j = i + 1 + List = [] + first_run = True + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '$}', found end.") + if argstr[i : i + 2] == "$}": + j = i + 2 + break + + if not first_run: + if argstr[i] == ",": + i += 1 + else: + self.BadSyntax(argstr, i, "expected: ','") + else: + first_run = False + + item: typing.List[Any] = [] + j = self.item(argstr, i, item) # @@@@@ should be path, was object + if j < 0: + self.BadSyntax(argstr, i, "expected item in set or '$}'") + List.append(self._store.intern(item[0])) + res.append(self._store.newSet(List, self._context)) + return j + else: + # parse a formula + j = i + 1 + oldParentContext = self._parentContext + self._parentContext = self._context + parentAnonymousNodes = self._anonymousNodes + grandParentVariables = self._parentVariables + self._parentVariables = self._variables + self._anonymousNodes = {} + self._variables = self._variables.copy() + reason2 = self._reason2 + self._reason2 = becauseSubexpression + if subj is None: + subj = self._store.newFormula() + self._context = subj + + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '}', found end.") + + if argstr[i] == "}": + j = i + 1 + break + j = self.directiveOrStatement(argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected statement or '}'") + + self._anonymousNodes = parentAnonymousNodes + self._variables = self._parentVariables + self._parentVariables = grandParentVariables + self._context = self._parentContext + self._reason2 = reason2 + self._parentContext = oldParentContext + res.append(subj.close()) # No use until closed + return j + + if ch == "(": + thing_type = self._store.newList + ch2 = argstr[i + 1] + if ch2 == "$": + thing_type = self._store.newSet + i += 1 + j = i + 1 + + List = [] + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed ')', found end.") + if argstr[i] == ")": + j = i + 1 + break + + item = [] + j = self.item(argstr, i, item) # @@@@@ should be path, was object + if j < 0: + self.BadSyntax(argstr, i, "expected item in list or ')'") + List.append(self._store.intern(item[0])) + res.append(thing_type(List, self._context)) + return j + + j = self.tok("this", argstr, i) # This context + if j >= 0: + self.BadSyntax( + argstr, + i, + "Keyword 'this' was ancient N3. Now use " + + "@forSome and @forAll keywords.", + ) + + # booleans + j = self.tok("true", argstr, i) + if j >= 0: + res.append(True) + return j + j = self.tok("false", argstr, i) + if j >= 0: + res.append(False) + return j + + if subj is None: # If this can be a named node, then check for a name. + j = self.uri_ref2(argstr, i, res) + if j >= 0: + return j + + return -1 + + def addingquotedRdfstarTriple(self, quoted_triple_list, dira): + if quoted_triple_list[0] == rdflib.term.URIRef('https://w3id.org/rdf-star/AssertedStatement'): + if quoted_triple_list[1] == rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement'): + if dira == "->": + self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[3], quoted_triple_list[5])) + quoted_triple_list[2].setSubject(quoted_triple_list[3]) + quoted_triple_list[2].setPredicate(quoted_triple_list[4]) + quoted_triple_list[2].setObject(quoted_triple_list[5]) + + else: + self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[5], quoted_triple_list[3])) + # quoted_triple_list[2].setSubject(quoted_triple_list[3]) + # quoted_triple_list[2].setPredicate(quoted_triple_list[4]) + # quoted_triple_list[2].setObject(quoted_triple_list[5]) + quoted_triple_list[2].setSubject(quoted_triple_list[4]) + quoted_triple_list[2].setPredicate(quoted_triple_list[5]) + quoted_triple_list[2].setObject(quoted_triple_list[6]) + + else: + if dira == "->": + self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[1], quoted_triple_list[3])) + else: + self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[3], quoted_triple_list[1])) + else: + if dira == "->": + quoted_triple_list[1].setSubject(quoted_triple_list[2]) + quoted_triple_list[1].setPredicate(quoted_triple_list[3]) + quoted_triple_list[1].setObject(quoted_triple_list[4]) + + else: + quoted_triple_list[1].setSubject(quoted_triple_list[2]) + quoted_triple_list[1].setPredicate(quoted_triple_list[3]) + quoted_triple_list[1].setObject(quoted_triple_list[4]) + # self.makerdfstarStatement((self._context,quoted_triple_list[1], quoted_triple_list[3], quoted_triple_list[4], quoted_triple_list[2])) # what if don't change to str + + def property_list(self, argstr: str, i: int, subj): + """Parse property list + Leaves the terminating punctuation in the buffer + """ + global quoted_triple_list + while 1: + while 1: # skip repeat ; + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax( + argstr, i, "EOF found when expected verb in property list" + ) + if argstr[j] != ";": + break + i = j + 1 + + if argstr[j : j + 2] == ":-": + if self.turtle: + self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") + i = j + 2 + res: typing.List[Any] = [] + j = self.node(argstr, i, res, subj) + if j < 0: + self.BadSyntax(argstr, i, "bad {} or () or [] node after :- ") + i = j + continue + i = j + v: typing.List[Any] = [] + j = self.verb(argstr, i, v) + if j <= 0: + return i # void but valid + + objs: typing.List[Any] = [] + + i = self.objectList(argstr, j, objs) + if i < 0: + self.BadSyntax(argstr, j, "objectList expected") + + for obj in objs: + dira, sym = v[0] + if "RdfstarTriple" in subj: + if "rdf-star" in str(obj): + if len(quoted_triple_list) > 2: + quoted_triple_list = [] + quoted_triple_list.append(obj) + if (rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement') in quoted_triple_list) & (not (subj in quoted_triple_list)): + quoted_triple_list.append(subj) + if "#object" in sym: + self.addingquotedRdfstarTriple(quoted_triple_list, dira) + else: + if dira == "->": + self.makeStatement((self._context, sym, subj, obj)) + else: + self.makeStatement((self._context, sym, obj, subj)) + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, j, "EOF found in list of objects") + if argstr[i] != ";": + return i + i += 1 # skip semicolon and continue + + def commaSeparatedList(self, argstr: str, j, res, what): + """return value: -1 bad syntax; >1 new position in argstr + res has things found appended + """ + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "EOF found expecting comma sep list") + if argstr[i] == ".": + return j # empty list is OK + i = what(argstr, i, res) + if i < 0: + return -1 + + while 1: + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + ch = argstr[j] + if ch != ",": + if ch != ".": + return -1 + return j # Found but not swallowed "." + i = what(argstr, j + 1, res) + if i < 0: + self.BadSyntax(argstr, i, "bad list content") + + def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: + i = self.object(argstr, i, res) + if i < 0: + return -1 + while 1: + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, j, "EOF found after object") + if argstr[j] != ",": + return j # Found something else! + i = self.object(argstr, j + 1, res) + if i < 0: + return i + + def checkDot(self, argstr: str, i: int): + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + ch = argstr[j] + if ch == ".": + return j + 1 # skip + if ch == "}": + return j # don't skip it + if ch == "]": + return j + self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") + + def uri_ref2(self, argstr: str, i: int, res): + """Generate uri from n3 representation. + Note that the RDF convention of directly concatenating + NS and local name is now used though I prefer inserting a '#' + to make the namesapces look more like what XML folks expect. + """ + qn: typing.List[Any] = [] + j = self.qname(argstr, i, qn) + if j >= 0: + pfx, ln = qn[0] + if pfx is None: + assert 0, "not used?" + ns = self._baseURI + ADDED_HASH # type: ignore[unreachable] + else: + try: + ns = self._bindings[pfx] + except KeyError: + if pfx == "_": # Magic prefix 2001/05/30, can be changed + res.append(self.anonymousNode(ln)) + return j + if not self.turtle and pfx == "": + ns = join(self._baseURI or "", "#") + else: + self.BadSyntax(argstr, i, 'Prefix "%s:" not bound' % (pfx)) + symb = self._store.newSymbol(ns + ln) + res.append(self._variables.get(symb, symb)) + return j + + i = self.skipSpace(argstr, i) + if i < 0: + return -1 + + if argstr[i] == "?": + v: typing.List[Any] = [] + j = self.variable(argstr, i, v) + if j > 0: # Forget variables as a class, only in context. + res.append(v[0]) + return j + return -1 + + elif argstr[i] == "<": + st = i + 1 + i = argstr.find(">", st) + if i >= 0: + uref = argstr[st:i] # the join should dealt with "": + + # expand unicode escapes + uref = unicodeEscape8.sub(unicodeExpand, uref) + uref = unicodeEscape4.sub(unicodeExpand, uref) + + if self._baseURI: + uref = join(self._baseURI, uref) # was: uripath.join + else: + assert ( + ":" in uref + ), "With no base URI, cannot deal with relative URIs" + if argstr[i - 1] == "#" and not uref[-1:] == "#": + uref += "#" # She meant it! Weirdness in urlparse? + symb = self._store.newSymbol(uref) + res.append(self._variables.get(symb, symb)) + return i + 1 + self.BadSyntax(argstr, j, "unterminated URI reference") + + elif self.keywordsSet: + v = [] + j = self.bareWord(argstr, i, v) + if j < 0: + return -1 # Forget variables as a class, only in context. + if v[0] in self.keywords: + self.BadSyntax(argstr, i, 'Keyword "%s" not allowed here.' % v[0]) + res.append(self._store.newSymbol(self._bindings[""] + v[0])) + return j + else: + return -1 + + def skipSpace(self, argstr: str, i: int): + """Skip white space, newlines and comments. + return -1 if EOF, else position of first non-ws character""" + + # Most common case is a non-commented line starting with few spaces and tabs. + try: + while True: + ch = argstr[i] + if ch in {" ", "\t"}: + i += 1 + continue + elif ch not in {"#", "\r", "\n"}: + return i + break + except IndexError: + return -1 + + while 1: + m = eol.match(argstr, i) + if m is None: + break + self.lines += 1 + self.startOfLine = i = m.end() # Point to first character unmatched + m = ws.match(argstr, i) + if m is not None: + i = m.end() + m = eof.match(argstr, i) + return i if m is None else -1 + + def variable(self, argstr: str, i: int, res): + """?abc -> variable(:abc)""" + + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + + if argstr[j] != "?": + return -1 + j += 1 + i = j + if argstr[j] in numberChars: + self.BadSyntax(argstr, j, "Variable name can't start with '%s'" % argstr[j]) + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 + if self._parentContext is None: + varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] + if varURI not in self._variables: + self._variables[varURI] = self._context.newUniversal( + varURI, why=self._reason2 + ) + res.append(self._variables[varURI]) + return i + # @@ was: + # self.BadSyntax(argstr, j, + # "Can't use ?xxx syntax for variable in outermost level: %s" + # % argstr[j-1:i]) + varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] + if varURI not in self._parentVariables: + self._parentVariables[varURI] = self._parentContext.newUniversal( + varURI, why=self._reason2 + ) + res.append(self._parentVariables[varURI]) + return i + + def bareWord(self, argstr: str, i: int, res): + """abc -> :abc""" + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + + if argstr[j] in numberChars or argstr[j] in _notKeywordsChars: + return -1 + i = j + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 + res.append(argstr[j:i]) + return i + + def qname(self, argstr: str, i: int, res): + """ + xyz:def -> ('xyz', 'def') + If not in keywords and keywordsSet: def -> ('', 'def') + :def -> ('', 'def') + """ + + i = self.skipSpace(argstr, i) + if i < 0: + return -1 + + c = argstr[i] + if c in numberCharsPlus: + return -1 + len_argstr = len(argstr) + if c not in _notNameChars: + j = i + i += 1 + + try: + while argstr[i] not in _notNameChars: + i += 1 + except IndexError: + pass # Very rare. + + if argstr[i - 1] == ".": # qname cannot end with "." + i -= 1 + if i == j: + return -1 + ln = argstr[j:i] + + else: # First character is non-alpha + ln = "" # Was: None - TBL (why? useful?) + + if i < len_argstr and argstr[i] == ":": + pfx = ln + # bnodes names have different rules + if pfx == "_": + allowedChars = _notNameChars + else: + allowedChars = _notQNameChars + + i += 1 + lastslash = False + start = i + ln = "" + while i < len_argstr: + c = argstr[i] + if c == "\\" and not lastslash: # Very rare. + lastslash = True + if start < i: + ln += argstr[start:i] + start = i + 1 + elif c not in allowedChars or lastslash: # Most common case is "a-zA-Z" + if lastslash: + if c not in escapeChars: + raise BadSyntax( + self._thisDoc, + self.lines, + argstr, + i, + "illegal escape " + c, + ) + elif c == "%": # Very rare. + if ( + argstr[i + 1] not in hexChars + or argstr[i + 2] not in hexChars + ): + raise BadSyntax( + self._thisDoc, + self.lines, + argstr, + i, + "illegal hex escape " + c, + ) + lastslash = False + else: + break + i += 1 + + if lastslash: + raise BadSyntax( + self._thisDoc, self.lines, argstr, i, "qname cannot end with \\" + ) + + if argstr[i - 1] == ".": + # localname cannot end in . + if len(ln) == 0 and start == i: + return -1 + i -= 1 + + if start < i: + ln += argstr[start:i] + + res.append((pfx, ln)) + return i + + else: # delimiter was not ":" + if ln and self.keywordsSet and ln not in self.keywords: + res.append(("", ln)) + return i + return -1 + + def object(self, argstr: str, i: int, res): + j = self.subject(argstr, i, res) + if j >= 0: + return j + else: + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + else: + i = j + + ch = argstr[i] + if ch in self.string_delimiters: + ch_three = ch * 3 + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 + else: + delim = ch + i += 1 + + j, s = self.strconst(argstr, i, delim) + + res.append(self._store.newLiteral(s)) # type: ignore[call-arg] # TODO FIXME + return j + else: + return -1 + + def nodeOrLiteral(self, argstr: str, i: int, res): + j = self.node(argstr, i, res) + startline = self.lines # Remember where for error messages + if j >= 0: + return j + else: + j = self.skipSpace(argstr, i) + if j < 0: + return -1 + else: + i = j + + ch = argstr[i] + if ch in numberCharsPlus: + m = exponent_syntax.match(argstr, i) + if m: + j = m.end() + res.append(float(argstr[i:j])) + return j + + m = decimal_syntax.match(argstr, i) + if m: + j = m.end() + res.append(Decimal(argstr[i:j])) + return j + + m = integer_syntax.match(argstr, i) + if m: + j = m.end() + res.append(long_type(argstr[i:j])) + return j + + # return -1 ## or fall through? + + ch_three = ch * 3 + if ch in self.string_delimiters: + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 + else: + delim = ch + i += 1 + + dt = None + j, s = self.strconst(argstr, i, delim) + lang = None + if argstr[j] == "@": # Language? + m = langcode.match(argstr, j + 1) + if m is None: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "Bad language code syntax on string " + "literal, after @", + ) + i = m.end() + lang = argstr[j + 1 : i] + j = i + if argstr[j : j + 2] == "^^": + res2: typing.List[Any] = [] + j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI + dt = res2[0] + res.append(self._store.newLiteral(s, dt, lang)) + return j + else: + return -1 + + def uriOf(self, sym): + if isinstance(sym, tuple): + return sym[1] # old system for --pipe + # return sym.uriref() # cwm api + return sym + + def strconst(self, argstr: str, i: int, delim): + """parse an N3 string constant delimited by delim. + return index, val + """ + delim1 = delim[0] + delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 + + j = i + ustr = "" # Empty unicode string + startline = self.lines # Remember where for error messages + len_argstr = len(argstr) + while j < len_argstr: + if argstr[j] == delim1: + if delim == delim1: # done when delim is " or ' + i = j + 1 + return i, ustr + if ( + delim == delim3 + ): # done when delim is """ or ''' and, respectively ... + if argstr[j : j + 5] == delim5: # ... we have "" or '' before + i = j + 5 + ustr += delim2 + return i, ustr + if argstr[j : j + 4] == delim4: # ... we have " or ' before + i = j + 4 + ustr += delim1 + return i, ustr + if argstr[j : j + 3] == delim3: # current " or ' is part of delim + i = j + 3 + return i, ustr + + # we are inside of the string and current char is " or ' + j += 1 + ustr += delim1 + continue + + m = interesting.search(argstr, j) # was argstr[j:]. + # Note for pos param to work, MUST be compiled ... re bug? + assert m, "Quote expected in string at ^ in %s^%s" % ( + argstr[j - 20 : j], + argstr[j : j + 20], + ) # at least need a quote + + i = m.start() + try: + ustr += argstr[j:i] + except UnicodeError: + err = "" + for c in argstr[j:i]: + err = err + (" %02x" % ord(c)) + streason = sys.exc_info()[1].__str__() + raise BadSyntax( + self._thisDoc, + startline, + argstr, + j, + "Unicode error appending characters" + + " %s to string, because\n\t%s" % (err, streason), + ) + + # print "@@@ i = ",i, " j=",j, "m.end=", m.end() + + ch = argstr[i] + if ch == delim1: + j = i + continue + elif ch in {'"', "'"} and ch != delim1: + ustr += ch + j = i + 1 + continue + elif ch in {"\r", "\n"}: + if delim == delim1: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "newline found in string literal", + ) + self.lines += 1 + ustr += ch + j = i + 1 + self.startOfLine = j + + elif ch == "\\": + j = i + 1 + ch = argstr[j] # Will be empty if string ends + if not ch: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "unterminated string literal (2)", + ) + k = "abfrtvn\\\"'".find(ch) + if k >= 0: + uch = "\a\b\f\r\t\v\n\\\"'"[k] + ustr += uch + j += 1 + elif ch == "u": + j, ch = self.uEscape(argstr, j + 1, startline) + ustr += ch + elif ch == "U": + j, ch = self.UEscape(argstr, j + 1, startline) + ustr += ch + else: + self.BadSyntax(argstr, i, "bad escape") + + self.BadSyntax(argstr, i, "unterminated string literal") + + def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): + if len(argstr) < i + n: + raise BadSyntax( + self._thisDoc, startline, argstr, i, "unterminated string literal(3)" + ) + try: + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) + except: + raise BadSyntax( + self._thisDoc, + startline, + argstr, + i, + "bad string literal hex escape: " + argstr[i : i + n], + ) + + def uEscape(self, argstr: str, i, startline): + return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, "u") + + def UEscape(self, argstr: str, i, startline): + return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U") + + def BadSyntax(self, argstr: str, i, msg): + raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) + + +# If we are going to do operators then they should generate +# [ is operator:plus of ( \1 \2 ) ] + + +class BadSyntax(SyntaxError): + def __init__(self, uri, lines, argstr, i, why): + self._str = argstr.encode("utf-8") # Better go back to strings for errors + self._i = i + self._why = why + self.lines = lines + self._uri = uri + + def __str__(self): + argstr = self._str + i = self._i + st = 0 + if i > 60: + pre = "..." + st = i - 60 + else: + pre = "" + if len(argstr) - i > 60: + post = "..." + else: + post = "" + + return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' % ( + self.lines + 1, + self._uri, + self._why, + pre, + argstr[st:i], + argstr[i : i + 60], + post, + ) + + @property + def message(self): + return str(self) + + +############################################################################### +class Formula(object): + number = 0 + + def __init__(self, parent): + self.uuid = uuid4().hex + self.counter = 0 + Formula.number += 1 + self.number = Formula.number + self.existentials = {} + self.universals = {} + + self.quotedgraph = QuotedGraph(store=parent.store, identifier=self.id()) + + def __str__(self): + return "_:Formula%s" % self.number + + def id(self): + return BNode("_:Formula%s" % self.number) + + def newBlankNode(self, uri=None, why=None): + if uri is None: + self.counter += 1 + bn = BNode("f%sb%s" % (self.uuid, self.counter)) + else: + bn = BNode(uri.split("#").pop().replace("_", "b")) + return bn + + def newRdfstarTriple(self, hashvalue, uri=None, why=None): + if uri is None: + # self.counter += 1 + rdfstartriple = RdfstarTriple(hashvalue = hashvalue) + else: + rdfstartriple = RdfstarTriple(hashvalue = hashvalue) + return rdfstartriple + + def newUniversal(self, uri, why=None): + return Variable(uri.split("#").pop()) + + def declareExistential(self, x): + self.existentials[x] = self.newBlankNode() + + def close(self): + + return self.quotedgraph + + +r_hibyte = re.compile(r"([\x80-\xff])") + + +class RDFSink(object): + def __init__(self, graph: Graph): + self.rootFormula: Optional[Formula] = None + self.uuid = uuid4().hex + self.counter = 0 + self.graph = graph + + def newFormula(self) -> Formula: + fa = getattr(self.graph.store, "formula_aware", False) + if not fa: + raise ParserError( + "Cannot create formula parser with non-formula-aware store." + ) + f = Formula(self.graph) + return f + + def newGraph(self, identifier: Identifier) -> Graph: + return Graph(self.graph.store, identifier) + + def newSymbol(self, *args: str): + return URIRef(args[0]) + + def newBlankNode( + self, + arg: Optional[Union[Formula, Graph, Any]] = None, + uri: Optional[str] = None, + why: Optional[Callable[[], None]] = None, + ) -> BNode: + if isinstance(arg, Formula): + return arg.newBlankNode(uri) + elif isinstance(arg, Graph) or arg is None: + self.counter += 1 + bn = BNode("n%sb%s" % (self.uuid, self.counter)) + else: + bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) + return bn + + def newRdfstarTriple( + self, + # hashvalue: Optional[str], + # arg: Optional[Union[Formula, Graph, Any]] = None, + # uri: Optional[str] = None, + arg: Optional[Union[Formula, Graph, Any]] = None, + uri: Optional[str] = None, + why: Optional[Callable[[], None]] = None, + hashvalue: Optional[str] = None + ) -> RdfstarTriple: + if isinstance(arg, Formula): + return arg.newRdfstarTriple(hashvalue = hashvalue) + elif isinstance(arg, Graph) or arg is None: + # self.counter += 1 + rdfstartriple = RdfstarTriple(hashvalue =hashvalue) + else: + rdfstartriple = RdfstarTriple(hashvalue =hashvalue) + return rdfstartriple + + def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: + if dt: + return Literal(s, datatype=dt) + else: + return Literal(s, lang=lang) + + def newList(self, n: typing.List[Any], f: Optional[Formula]): + nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") + if not n: + return nil + + first = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#first") + rest = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest") + af = a = self.newBlankNode(f) + + for ne in n[:-1]: + self.makeStatement((f, first, a, ne)) + an = self.newBlankNode(f) + self.makeStatement((f, rest, a, an)) + a = an + self.makeStatement((f, first, a, n[-1])) + self.makeStatement((f, rest, a, nil)) + return af + + def newSet(self, *args): + return set(args) + + def setDefaultNamespace(self, *args) -> str: + return ":".join(repr(n) for n in args) + + def makeStatement(self, quadruple, why=None) -> None: + f, p, s, o = quadruple + + if hasattr(p, "formula"): + raise ParserError("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.add((s, p, o)) + elif isinstance(f, Formula): + f.quotedgraph.add((s, p, o)) + else: + f.add((s, p, o)) + + # return str(quadruple) + + def makerdfstarStatement(self, quadruple, why=None) -> None: + f, hashnode, p, s, o = quadruple + + if hasattr(p, "formula"): + raise ParserError("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.addStarTriple((hashnode, s, p, o)) + elif isinstance(f, Formula): + f.quotedgraph.addStarTriple((hashnode, s, p, o)) + else: + f.addStarTriple((hashnode, s, p, o)) + + # return str(quadruple) + + def normalise(self, f: Optional[Formula], n): + if isinstance(n, tuple): + return URIRef(str(n[1])) + + if isinstance(n, bool): + s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) + return s + + if isinstance(n, int) or isinstance(n, long_type): + s = Literal(str(n), datatype=INTEGER_DATATYPE) + return s + + if isinstance(n, Decimal): + value = str(n) + if value == "-0": + value = "0" + s = Literal(value, datatype=DECIMAL_DATATYPE) + return s + + if isinstance(n, float): + s = Literal(str(n), datatype=DOUBLE_DATATYPE) + return s + + if isinstance(f, Formula): + if n in f.existentials: + return f.existentials[n] + + # if isinstance(n, Var): + # if f.universals.has_key(n): + # return f.universals[n] + # f.universals[n] = f.newBlankNode() + # return f.universals[n] + + return n + + def intern(self, something: AnyT) -> AnyT: + return something + + def bind(self, pfx, uri): + pass # print pfx, ':', uri + + def startDoc(self, formula: Optional[Formula]): + self.rootFormula = formula + + def endDoc(self, formula: Optional[Formula]) -> None: + pass + + +################################################### +# +# Utilities +# + + +def hexify(ustr): + """Use URL encoding to return an ASCII string + corresponding to the given UTF8 string + >>> hexify("http://example/a b") + b'http://example/a%20b' + """ + # s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 126 or ord(ch) < 33: + ch = "%%%02X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return s.encode("latin-1") + + +class TurtleParser(Parser): + + """ + An RDFLib parser for Turtle + See http://www.w3.org/TR/turtle/ + """ + + def __init__(self): + pass + + def parse( + self, + source: "InputSource", + graph: Graph, + encoding: Optional[str] = "utf-8", + turtle: bool = True, + ): + if encoding not in [None, "utf-8"]: + raise ParserError( + "N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding + ) + + sink = RDFSink(graph) + + baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") + p = SinkParser(sink, baseURI=baseURI, turtle=turtle) + # N3 parser prefers str stream + # stream = source.getCharacterStream() + # if not stream: + # stream = source.getByteStream() + # p.loadStream(stream) + + if hasattr(source, "file"): + f = open(source.file.name, "rb") + rdbytes = f.read() + f.close() + elif hasattr(source, "_InputSource__bytefile"): + if hasattr(source._InputSource__bytefile, "wrapped"): + f = open((source._InputSource__bytefile.wrapped.strip().splitlines())[0], "rb") # what if multiple files + rdbytes = f.read() + f.close() + + bp = rdbytes.decode("utf-8") + ou = RDFstarParsings(bp) + p.feed(ou) + p.endDoc() + for prefix, namespace in p._bindings.items(): + graph.bind(prefix, namespace) + + +class N3Parser(TurtleParser): + + """ + An RDFLib parser for Notation3 + See http://www.w3.org/DesignIssues/Notation3.html + """ + + def __init__(self): + pass + + def parse(self, source, graph, encoding="utf-8"): + # we're currently being handed a Graph, not a ConjunctiveGraph + # context-aware is this implied by formula_aware + ca = getattr(graph.store, "context_aware", False) + fa = getattr(graph.store, "formula_aware", False) + if not ca: + raise ParserError("Cannot parse N3 into non-context-aware store.") + elif not fa: + raise ParserError("Cannot parse N3 into non-formula-aware store.") + + conj_graph = ConjunctiveGraph(store=graph.store) + conj_graph.default_context = graph # TODO: CG __init__ should have a + # default_context arg + # TODO: update N3Processor so that it can use conj_graph as the sink + conj_graph.namespace_manager = graph.namespace_manager + + TurtleParser.parse(self, source, conj_graph, encoding, turtle=False) diff --git a/rdflib/plugins/serializers/ntriples-star.py b/rdflib/plugins/serializers/ntriples-star.py new file mode 100644 index 000000000..0e03196ea --- /dev/null +++ b/rdflib/plugins/serializers/ntriples-star.py @@ -0,0 +1,527 @@ +""" +HextuplesSerializer RDF graph serializer for RDFLib. +See for details about the format. +""" +# from this import d +from typing import IO, Optional, Type, Union +import json +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.term import Literal, URIRef, Node, BNode, RdfstarTriple +from rdflib.serializer import Serializer +from rdflib.namespace import RDF, XSD +import warnings +import rdflib + +__all__ = ["NtriplesStarSerializer"] +from rdflib import Namespace, Graph +RDFSTAR = Namespace("https://w3id.org/rdf-star/") + +class NtriplesStarSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def __init__(self, store: Union[Graph, ConjunctiveGraph]): + self.default_context: Optional[Node] + self.graph_type: Type[Graph] + if isinstance(store, ConjunctiveGraph): + self.graph_type = ConjunctiveGraph + self.contexts = list(store.contexts()) + if store.default_context: + self.default_context = store.default_context + self.contexts.append(store.default_context) + else: + self.default_context = None + else: + self.graph_type = Graph + self.contexts = [store] + self.default_context = None + + Serializer.__init__(self, store) + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = "utf-8", + **kwargs, + ): + if base is not None: + warnings.warn( + "base has no meaning for Hextuples serialization. " + "I will ignore this value" + ) + + if encoding not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded. " + f"I was passed: {encoding}, " + "but I'm still going to use utf-8 anyway!" + ) + + if self.store.formula_aware is True: + raise Exception( + "Hextuple serialization can't (yet) handle formula-aware stores" + ) + dictionary = {} + blanknode_dictionary = {} + result_subject = "" + result_object = "" + + def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary): + quoted_Bnode_or_not = False + # print("update_dictionary_RdfstarTriple", node ) + if type(node) == rdflib.term.BNode: + for s, p, o in g.triples((node, None, None)): + if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + # print("here", node) + if isinstance(p, rdflib.term.URIRef): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + if isinstance(s, rdflib.term.BNode): + s = "_:"+str(s) + + if isinstance(o, rdflib.term.URIRef): + o = "<"+str(o)+">" + elif isinstance(o, rdflib.term.Literal): + o = o._literal_n3(use_plain=True) + elif isinstance(o, rdflib.term.BNode): + o = "_:"+str(o) + + if not (node in blanknode_dictionary): + + blanknode_dictionary[node] = [p, o] + + elif ((p in blanknode_dictionary[node]) & (o in blanknode_dictionary[node])): + pass + else: + + blanknode_dictionary[node].append(";") + blanknode_dictionary[node].append(p) + blanknode_dictionary[node].append(o) + + else: + + if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + collection_or_not = True + quoted_Bnode_or_not = False + + if o in dictionary: + properties.append(dictionary[o]) + elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append("(") + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append(")") + + else: + if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + + else: + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(p, rdflib.term.URIRef)): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + pass + properties.append(p) + if o in dictionary: + properties.append(dictionary[o]) + else: + update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if type(node) == rdflib.term.RdfstarTriple: + collection_or_not = False + quoted_Bnode_or_not = True + if node in dictionary: + pass + else: + + subject = node.subject() + predicate = node.predicate() + object = node.object() + + if subject in dictionary: + subject = dictionary[subject] + + if object in dictionary: + object = dictionary[object] + + subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) + objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) + + if (isinstance(subject, rdflib.term.URIRef)): + # print("tttttttttttuuuuuuuuuuuuuu") + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + + if (isinstance(object, rdflib.term.URIRef)): + # print("tttttttttttuuuuuuuuuuuuuu") + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) + if isinstance(predicate, rdflib.term.URIRef): + predicate = "<"+str(predicate)+">" + + if subjectexpandable: + result_object, ifcollection, ifquotedBnode, d1 = update_dictionary_RdfstarTriple(subject, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + if isinstance(subject, rdflib.term.RdfstarTriple): + subject = d1[subject] + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + + if objectexpandable: + result_object, ifcollection, ifquotedBnode, d2 = update_dictionary_RdfstarTriple(object, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + if isinstance(object, rdflib.term.RdfstarTriple): + object = d2[object] + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) + + if ((not subjectexpandable) and (not objectexpandable)): + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + if node not in dictionary: + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + else: + pass + return properties, collection_or_not, quoted_Bnode_or_not, dictionary + + def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not): + + quoted_Bnode_or_not = False + if type(node) == rdflib.term.BNode: + for s, p, o in g.triples((node, None, None)): + if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + else: + + if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + collection_or_not = True + quoted_Bnode_or_not = False + if o in dictionary: + properties.append(dictionary[o]) + + elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append("(") + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append(")") + + else: + + if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + + else: + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(p, rdflib.term.URIRef)): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + pass + properties.append(p) + if o in dictionary: + properties.append(dictionary[o]) + else: + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if type(node) == rdflib.term.RdfstarTriple: + ollection_or_not = False + quoted_Bnode_or_not = True + if node in dictionary: + properties.append(dictionary[node]) + + else: + + subject = node.subject() + predicate = node.predicate() + object = node.object() + if subject in dictionary: + + subject = dictionary[subject] + if object in dictionary: + + object = dictionary[object] + subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) + objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif isinstance(subject, rdflib.term.RdfstarTriple): + subject = dictionary[subject] + elif isinstance(subject, rdflib.term.BNode): + + + if subject in blanknode_dictionary: + subject = "["+"".join(blanknode_dictionary[subject])+"]" + else: + + subject = "_:"+str(subject) + + + if (isinstance(object, rdflib.term.URIRef)): + + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.RdfstarTriple): + object = dictionary[object] + elif isinstance(object, rdflib.term.BNode): + if object in blanknode_dictionary: + object = "["+"".join(blanknode_dictionary[object])+"]" + else: + object = "_:"+str(object) + + if isinstance(predicate, rdflib.term.URIRef): + predicate = "<"+str(predicate)+">" + + if ((not subjectexpandable) and (not objectexpandable)): + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + if node not in dictionary: + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + properties.append("<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>") + else: + properties.append(dictionary[node]) + + return properties, collection_or_not, quoted_Bnode_or_not, dictionary + + # this loop is for updating the quoted triple dictionary and blank node dictionary + for g in self.contexts: + + for s,p,o in g.triples((None, None, None)): + + if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): + pass + elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: + subject = s + predicate = p + object = o + + properties = [] + collection_or_not = False + quoted_Bnode_or_not = False + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): + thenode_id = str(subject) + + result_subject, ifcollection, ifquotedBnode, dictionary = update_dictionary_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if (not len(result_subject) == 0): + if ifcollection == True: + result_subject.insert(0, "(") + result_subject.append(")") + elif subject in blanknode_dictionary: + subject = "["+"".join(blanknode_dictionary[subject])+"]" + elif ifquotedBnode: + pass + else: + + result_subject.insert(0, "[") + result_subject.append("]") + subject = "".join(result_subject) + + else: + + + subject = "[]" + if subject == "[]": + + subject = " _:"+thenode_id + + + if (isinstance(object, rdflib.term.URIRef)): + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): + thenode_id = str(object) + result_object, ifcollection, ifquotedBnode, dictionary = update_dictionary_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if (not len(result_object) == 0): + if ifcollection == True: + result_object.insert(0, "(") + result_object.append(")") + + elif object in blanknode_dictionary: + object = "["+"".join(blanknode_dictionary[object])+"]" + elif ifquotedBnode: + pass + else: + result_object.insert(0, "[") + result_object.append("]") + object = "".join(result_object) + + else: + object = "[]" + if object == "[]": + + object = " _:"+thenode_id + + + if(isinstance(predicate, rdflib.term.URIRef)): + predicate = "<"+str(predicate)+">" + + # this loop is for serializing results + for g in self.contexts: + + for s,p,o in g.triples((None, None, None)): + + if s in blanknode_dictionary: + + re1 = False + re2 = False + if len(blanknode_dictionary[s]) < 4: + + re2 = True + + else: + re2 = False + re1 = True + + + if re1 or re2: + if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): + pass + elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: + + subject = s + predicate = p + object = o + + + properties = [] + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): + thenode_id = str(subject) + + result_subject, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) + + if (not len(result_subject) == 0): + if ifcollection == True: + result_subject.insert(0, "(") + result_subject.append(")") + + elif ifquotedBnode: + pass + else: + + result_subject.insert(0, "[") + result_subject.append("]") + subject = "".join(result_subject) + else: + if (subject in blanknode_dictionary): + if(len(blanknode_dictionary[subject])>2): + subject = "["+"".join(blanknode_dictionary[subject])+"]" + else: + subject = "[]" + else: + subject = "[]" + if subject == "[]": + + subject = " _:"+thenode_id + properties = [] + + + if (isinstance(object, rdflib.term.URIRef)): + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): + thenode_id = str(object) + result_object, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) + + + if (not len(result_object) == 0): + if ifcollection == True: + result_object.insert(0, "(") + result_object.append(")") + # elif ifquotedBnode: + + elif ifquotedBnode: + pass + else: + result_object.insert(0, "[") + result_object.append("]") + object = "".join(result_object) + else: + if (object in blanknode_dictionary): + if(len(blanknode_dictionary[object])>2): + object = "["+"".join(blanknode_dictionary[object])+"]" + else: + object = "[]" + else: + object = "[]" + + if object == "[]": + + object = " _:"+thenode_id + properties = [] + + if(isinstance(predicate, rdflib.term.URIRef)): + predicate = "<"+str(predicate)+">" + + output = subject+" "+predicate+" "+object+" ."+"\n" + if output is not None: + stream.write(output.encode()) + + + + def _iri_or_bn(self, i_): + if isinstance(i_, URIRef): + return f"{i_}" + elif isinstance(i_, BNode): + return f"{i_.n3()}" + else: + return None + + def _context(self, context): + if self.graph_type == Graph: + return "" + if context.identifier == "urn:x-rdflib:default": + return "" + elif context is not None and self.default_context is not None: + if context.identifier == self.default_context.identifier: + return "" + return context.identifier diff --git a/rdflib/plugins/serializers/trigstar.py b/rdflib/plugins/serializers/trigstar.py new file mode 100644 index 000000000..33b59a049 --- /dev/null +++ b/rdflib/plugins/serializers/trigstar.py @@ -0,0 +1,552 @@ +""" +HextuplesSerializer RDF graph serializer for RDFLib. +See for details about the format. +""" +# from this import d +from typing import IO, Optional, Type, Union +import json +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.term import Literal, URIRef, Node, BNode, RdfstarTriple +from rdflib.serializer import Serializer +from rdflib.namespace import RDF, XSD +import warnings +import rdflib +import hashlib + +def myHash(text:str): + return str(hashlib.md5(text.encode('utf-8')).hexdigest()) + +__all__ = ["TrigstarSerializer"] +from rdflib import Namespace, Graph +RDFSTAR = Namespace("https://w3id.org/rdf-star/") + +class TrigstarSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def __init__(self, store: Union[Graph, ConjunctiveGraph]): + self.default_context: Optional[Node] + self.graph_type: Type[Graph] + if isinstance(store, ConjunctiveGraph): + self.graph_type = ConjunctiveGraph + self.contexts = list(store.contexts()) + if store.default_context: + self.default_context = store.default_context + self.contexts.append(store.default_context) + else: + self.default_context = None + else: + self.graph_type = Graph + self.contexts = [store] + self.default_context = None + + Serializer.__init__(self, store) + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = "utf-8", + **kwargs, + ): + if base is not None: + warnings.warn( + "base has no meaning for Hextuples serialization. " + "I will ignore this value" + ) + + if encoding not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded. " + f"I was passed: {encoding}, " + "but I'm still going to use utf-8 anyway!" + ) + + if self.store.formula_aware is True: + raise Exception( + "Hextuple serialization can't (yet) handle formula-aware stores" + ) + dictionary = {} + blanknode_dictionary = {} + result_subject = "" + result_object = "" + output = "" + def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary): + quoted_Bnode_or_not = False + if type(node) == rdflib.term.BNode: + for s, p, o in g.triples((node, None, None)): + if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + if isinstance(p, rdflib.term.URIRef): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + if isinstance(s, rdflib.term.BNode): + s = "_:"+str(s) + + if isinstance(o, rdflib.term.URIRef): + o = "<"+str(o)+">" + elif isinstance(o, rdflib.term.Literal): + o = o._literal_n3(use_plain=True) + elif isinstance(o, rdflib.term.BNode): + o = "_:"+str(o) + + if not (node in blanknode_dictionary): + + blanknode_dictionary[node] = [p, o] + + elif ((p in blanknode_dictionary[node]) & (o in blanknode_dictionary[node])): + pass + else: + + blanknode_dictionary[node].append(";") + blanknode_dictionary[node].append(p) + blanknode_dictionary[node].append(o) + + else: + + if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + collection_or_not = True + quoted_Bnode_or_not = False + + if o in dictionary: + properties.append(dictionary[o]) + + elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append("(") + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append(")") + + else: + + if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + + else: + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(p, rdflib.term.URIRef)): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + + pass + properties.append(p) + if o in dictionary: + properties.append(dictionary[o]) + + else: + + update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + + if type(node) == rdflib.term.RdfstarTriple: + + collection_or_not = False + quoted_Bnode_or_not = True + if node in dictionary: + + pass + else: + + subject = node.subject() + predicate = node.predicate() + object = node.object() + + if subject in dictionary: + + subject = dictionary[subject] + if object in dictionary: + + object = dictionary[object] + subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) + objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + + if (isinstance(object, rdflib.term.URIRef)): + + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) + if isinstance(predicate, rdflib.term.URIRef): + predicate = "<"+str(predicate)+">" + + if subjectexpandable: + result_object, ifcollection, ifquotedBnode, d1 = update_dictionary_RdfstarTriple(subject, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + if isinstance(subject, rdflib.term.RdfstarTriple): + subject = d1[subject] + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + + if objectexpandable: + result_object, ifcollection, ifquotedBnode, d2 = update_dictionary_RdfstarTriple(object, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + if isinstance(object, rdflib.term.RdfstarTriple): + object = d2[object] + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) + + if ((not subjectexpandable) and (not objectexpandable)): + + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + if node not in dictionary: + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + else: + + pass + return properties, collection_or_not, quoted_Bnode_or_not, dictionary + + def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not): + + quoted_Bnode_or_not = False + if type(node) == rdflib.term.BNode: + for s, p, o in g.triples((node, None, None)): + if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + else: + + + if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + collection_or_not = True + quoted_Bnode_or_not = False + + if o in dictionary: + properties.append(dictionary[o]) + + elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append("(") + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append(")") + + else: + + if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + + else: + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(p, rdflib.term.URIRef)): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + + pass + properties.append(p) + if o in dictionary: + properties.append(dictionary[o]) + + else: + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if type(node) == rdflib.term.RdfstarTriple: + + collection_or_not = False + quoted_Bnode_or_not = True + if node in dictionary: + + properties.append(dictionary[node]) + + else: + + subject = node.subject() + predicate = node.predicate() + object = node.object() + if subject in dictionary: + + subject = dictionary[subject] + if object in dictionary: + + object = dictionary[object] + subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) + objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif isinstance(subject, rdflib.term.RdfstarTriple): + subject = dictionary[subject] + elif isinstance(subject, rdflib.term.BNode): + + if subject in blanknode_dictionary: + subject = "["+"".join(blanknode_dictionary[subject])+"]" + else: + + subject = "_:"+str(subject) + + + if (isinstance(object, rdflib.term.URIRef)): + + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.RdfstarTriple): + object = dictionary[object] + elif isinstance(object, rdflib.term.BNode): + if object in blanknode_dictionary: + object = "["+"".join(blanknode_dictionary[object])+"]" + else: + object = "_:"+str(object) + + if isinstance(predicate, rdflib.term.URIRef): + predicate = "<"+str(predicate)+">" + + + + if ((not subjectexpandable) and (not objectexpandable)): + + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + if node not in dictionary: + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + properties.append("<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>") + + else: + properties.append(dictionary[node]) + + return properties, collection_or_not, quoted_Bnode_or_not, dictionary + + # this loop is for updating the quoted triple dictionary and blank node dictionary + for g in self.contexts: + + for s,p,o in g.triples((None, None, None)): + + if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): + pass + elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: + subject = s + predicate = p + object = o + + properties = [] + collection_or_not = False + quoted_Bnode_or_not = False + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): + thenode_id = str(subject) + + result_subject, ifcollection, ifquotedBnode, dictionary = update_dictionary_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if (not len(result_subject) == 0): + if ifcollection == True: + result_subject.insert(0, "(") + result_subject.append(")") + elif subject in blanknode_dictionary: + subject = "["+"".join(blanknode_dictionary[subject])+"]" + elif ifquotedBnode: + pass + else: + + result_subject.insert(0, "[") + result_subject.append("]") + subject = "".join(result_subject) + + else: + + # else: + subject = "[]" + if subject == "[]": + + subject = " _:"+thenode_id + + + if (isinstance(object, rdflib.term.URIRef)): + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): + thenode_id = str(object) + result_object, ifcollection, ifquotedBnode, dictionary = update_dictionary_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if (not len(result_object) == 0): + if ifcollection == True: + result_object.insert(0, "(") + result_object.append(")") + + elif object in blanknode_dictionary: + object = "["+"".join(blanknode_dictionary[object])+"]" + elif ifquotedBnode: + pass + else: + result_object.insert(0, "[") + result_object.append("]") + object = "".join(result_object) + + else: + object = "[]" + if object == "[]": + + object = " _:"+thenode_id + + + if(isinstance(predicate, rdflib.term.URIRef)): + predicate = "<"+str(predicate)+">" + + # this loop is for serializing results + for g in self.contexts: + + for s,p,o in g.triples((None, None, None)): + + if s in blanknode_dictionary: + + re1 = False + re2 = False + if len(blanknode_dictionary[s]) < 4: + + re2 = True + + else: + re2 = False + re1 = True + + if re1 or re2: + if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): + pass + elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: + + subject = s + predicate = p + object = o + + + properties = [] + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): + thenode_id = str(subject) + + result_subject, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) + + if (not len(result_subject) == 0): + if ifcollection == True: + result_subject.insert(0, "(") + result_subject.append(")") + + elif ifquotedBnode: + pass + else: + + result_subject.insert(0, "[") + result_subject.append("]") + subject = "".join(result_subject) + else: + if (subject in blanknode_dictionary): + if(len(blanknode_dictionary[subject])>2): + subject = "["+"".join(blanknode_dictionary[subject])+"]" + else: + subject = "[]" + else: + subject = "[]" + if subject == "[]": + + subject = " _:"+thenode_id + properties = [] + + + if (isinstance(object, rdflib.term.URIRef)): + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): + thenode_id = str(object) + result_object, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) + + + if (not len(result_object) == 0): + if ifcollection == True: + result_object.insert(0, "(") + result_object.append(")") + + elif ifquotedBnode: + pass + else: + result_object.insert(0, "[") + result_object.append("]") + object = "".join(result_object) + else: + if (object in blanknode_dictionary): + if(len(blanknode_dictionary[object])>2): + object = "["+"".join(blanknode_dictionary[object])+"]" + else: + object = "[]" + else: + object = "[]" + + if object == "[]": + + object = " _:"+thenode_id + properties = [] + + if(isinstance(predicate, rdflib.term.URIRef)): + predicate = "<"+str(predicate)+">" + + output = output+subject+" "+predicate+" "+object+" ."+"\n" + + if output is not None: + output = "_:"+str(myHash(output))+ "{\n"+ output + "}" + stream.write(output.encode()) + + def _iri_or_bn(self, i_): + if isinstance(i_, URIRef): + return f"{i_}" + elif isinstance(i_, BNode): + return f"{i_.n3()}" + else: + return None + + def _context(self, context): + if self.graph_type == Graph: + return "" + if context.identifier == "urn:x-rdflib:default": + return "" + elif context is not None and self.default_context is not None: + if context.identifier == self.default_context.identifier: + return "" + return context.identifier diff --git a/rdflib/plugins/serializers/turtlestar.py b/rdflib/plugins/serializers/turtlestar.py new file mode 100644 index 000000000..ee8dc0f11 --- /dev/null +++ b/rdflib/plugins/serializers/turtlestar.py @@ -0,0 +1,547 @@ +""" +HextuplesSerializer RDF graph serializer for RDFLib. +See for details about the format. +""" +# from this import d +from typing import IO, Optional, Type, Union +import json +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.term import Literal, URIRef, Node, BNode, RdfstarTriple +from rdflib.serializer import Serializer +from rdflib.namespace import RDF, XSD +import warnings +import rdflib + +__all__ = ["TurtlestarSerializer"] +from rdflib import Namespace, Graph +RDFSTAR = Namespace("https://w3id.org/rdf-star/") + +class TurtlestarSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def __init__(self, store: Union[Graph, ConjunctiveGraph]): + self.default_context: Optional[Node] + self.graph_type: Type[Graph] + if isinstance(store, ConjunctiveGraph): + self.graph_type = ConjunctiveGraph + self.contexts = list(store.contexts()) + if store.default_context: + self.default_context = store.default_context + self.contexts.append(store.default_context) + else: + self.default_context = None + else: + self.graph_type = Graph + self.contexts = [store] + self.default_context = None + + Serializer.__init__(self, store) + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = "utf-8", + **kwargs, + ): + if base is not None: + warnings.warn( + "base has no meaning for Hextuples serialization. " + "I will ignore this value" + ) + + if encoding not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded. " + f"I was passed: {encoding}, " + "but I'm still going to use utf-8 anyway!" + ) + + if self.store.formula_aware is True: + raise Exception( + "Hextuple serialization can't (yet) handle formula-aware stores" + ) + dictionary = {} + blanknode_dictionary = {} + result_subject = "" + result_object = "" + + def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary): + quoted_Bnode_or_not = False + + if type(node) == rdflib.term.BNode: + for s, p, o in g.triples((node, None, None)): + if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + # print("here", node) + if isinstance(p, rdflib.term.URIRef): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + if isinstance(s, rdflib.term.BNode): + s = "_:"+str(s) + + if isinstance(o, rdflib.term.URIRef): + o = "<"+str(o)+">" + elif isinstance(o, rdflib.term.Literal): + o = o._literal_n3(use_plain=True) + elif isinstance(o, rdflib.term.BNode): + o = "_:"+str(o) + + if not (node in blanknode_dictionary): + + + blanknode_dictionary[node] = [p, o] + + elif ((p in blanknode_dictionary[node]) & (o in blanknode_dictionary[node])): + pass + else: + + blanknode_dictionary[node].append(";") + blanknode_dictionary[node].append(p) + blanknode_dictionary[node].append(o) + + else: + + if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + collection_or_not = True + quoted_Bnode_or_not = False + + if o in dictionary: + properties.append(dictionary[o]) + + + elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append("(") + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append(")") + + else: + + if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + + else: + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(p, rdflib.term.URIRef)): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + + pass + properties.append(p) + if o in dictionary: + properties.append(dictionary[o]) + + else: + + update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + + if type(node) == rdflib.term.RdfstarTriple: + + collection_or_not = False + quoted_Bnode_or_not = True + if node in dictionary: + + pass + else: + + + subject = node.subject() + predicate = node.predicate() + object = node.object() + + if subject in dictionary: + + subject = dictionary[subject] + if object in dictionary: + + object = dictionary[object] + subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) + objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + + if (isinstance(object, rdflib.term.URIRef)): + + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) + if isinstance(predicate, rdflib.term.URIRef): + predicate = "<"+str(predicate)+">" + + if subjectexpandable: + result_object, ifcollection, ifquotedBnode, d1 = update_dictionary_RdfstarTriple(subject, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + if isinstance(subject, rdflib.term.RdfstarTriple): + subject = d1[subject] + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + + if objectexpandable: + result_object, ifcollection, ifquotedBnode, d2 = update_dictionary_RdfstarTriple(object, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + if isinstance(object, rdflib.term.RdfstarTriple): + object = d2[object] + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) + + if ((not subjectexpandable) and (not objectexpandable)): + + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + if node not in dictionary: + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + else: + + pass + return properties, collection_or_not, quoted_Bnode_or_not, dictionary + + def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not): + + quoted_Bnode_or_not = False + if type(node) == rdflib.term.BNode: + for s, p, o in g.triples((node, None, None)): + if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + else: + + if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + collection_or_not = True + quoted_Bnode_or_not = False + + if o in dictionary: + properties.append(dictionary[o]) + + elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append("(") + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): + properties.append(")") + + else: + + if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): + pass + + else: + collection_or_not = False + quoted_Bnode_or_not = False + if (isinstance(p, rdflib.term.URIRef)): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + + pass + properties.append(p) + if o in dictionary: + properties.append(dictionary[o]) + + else: + + expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + + + if type(node) == rdflib.term.RdfstarTriple: + + collection_or_not = False + quoted_Bnode_or_not = True + if node in dictionary: + + properties.append(dictionary[node]) + + else: + + subject = node.subject() + predicate = node.predicate() + object = node.object() + if subject in dictionary: + + subject = dictionary[subject] + if object in dictionary: + + object = dictionary[object] + subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) + objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif isinstance(subject, rdflib.term.RdfstarTriple): + subject = dictionary[subject] + elif isinstance(subject, rdflib.term.BNode): + + if subject in blanknode_dictionary: + subject = "["+"".join(blanknode_dictionary[subject])+"]" + else: + subject = "_:"+str(subject) + + + if (isinstance(object, rdflib.term.URIRef)): + + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.RdfstarTriple): + object = dictionary[object] + elif isinstance(object, rdflib.term.BNode): + if object in blanknode_dictionary: + object = "["+"".join(blanknode_dictionary[object])+"]" + else: + object = "_:"+str(object) + + if isinstance(predicate, rdflib.term.URIRef): + predicate = "<"+str(predicate)+">" + + + if ((not subjectexpandable) and (not objectexpandable)): + + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + + if node not in dictionary: + + dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" + properties.append("<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>") + + else: + properties.append(dictionary[node]) + + return properties, collection_or_not, quoted_Bnode_or_not, dictionary + + # this loop is for updating the quoted triple dictionary and blank node dictionary + for g in self.contexts: + + for s,p,o in g.triples((None, None, None)): + + if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): + pass + elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: + subject = s + predicate = p + object = o + + properties = [] + collection_or_not = False + quoted_Bnode_or_not = False + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): + thenode_id = str(subject) + + result_subject, ifcollection, ifquotedBnode, dictionary = update_dictionary_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if (not len(result_subject) == 0): + if ifcollection == True: + result_subject.insert(0, "(") + result_subject.append(")") + elif subject in blanknode_dictionary: + subject = "["+"".join(blanknode_dictionary[subject])+"]" + elif ifquotedBnode: + pass + else: + + result_subject.insert(0, "[") + result_subject.append("]") + subject = "".join(result_subject) + + else: + + # else: + subject = "[]" + if subject == "[]": + + subject = " _:"+thenode_id + + + if (isinstance(object, rdflib.term.URIRef)): + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): + thenode_id = str(object) + result_object, ifcollection, ifquotedBnode, dictionary = update_dictionary_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) + + if (not len(result_object) == 0): + if ifcollection == True: + result_object.insert(0, "(") + result_object.append(")") + + elif object in blanknode_dictionary: + object = "["+"".join(blanknode_dictionary[object])+"]" + elif ifquotedBnode: + pass + else: + result_object.insert(0, "[") + result_object.append("]") + object = "".join(result_object) + else: + object = "[]" + if object == "[]": + + object = " _:"+thenode_id + + + if(isinstance(predicate, rdflib.term.URIRef)): + predicate = "<"+str(predicate)+">" + + # this loop is for serializing results + for g in self.contexts: + + for s,p,o in g.triples((None, None, None)): + + if s in blanknode_dictionary: + + re1 = False + re2 = False + if len(blanknode_dictionary[s]) < 4: + re2 = True + + else: + re2 = False + re1 = True + + if re1 or re2: + if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): + pass + elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: + + subject = s + predicate = p + object = o + + + properties = [] + collection_or_not = False + quoted_Bnode_or_not = False + + if (isinstance(subject, rdflib.term.URIRef)): + + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): + thenode_id = str(subject) + + result_subject, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) + + if (not len(result_subject) == 0): + if ifcollection == True: + result_subject.insert(0, "(") + result_subject.append(")") + + elif ifquotedBnode: + pass + else: + + result_subject.insert(0, "[") + result_subject.append("]") + subject = "".join(result_subject) + else: + if (subject in blanknode_dictionary): + if(len(blanknode_dictionary[subject])>2): + subject = "["+"".join(blanknode_dictionary[subject])+"]" + else: + subject = "[]" + else: + subject = "[]" + if subject == "[]": + + subject = " _:"+thenode_id + properties = [] + + + if (isinstance(object, rdflib.term.URIRef)): + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): + thenode_id = str(object) + result_object, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) + + + if (not len(result_object) == 0): + if ifcollection == True: + result_object.insert(0, "(") + result_object.append(")") + + elif ifquotedBnode: + pass + else: + result_object.insert(0, "[") + result_object.append("]") + object = "".join(result_object) + else: + if (object in blanknode_dictionary): + if(len(blanknode_dictionary[object])>2): + object = "["+"".join(blanknode_dictionary[object])+"]" + else: + object = "[]" + else: + object = "[]" + + if object == "[]": + + object = " _:"+thenode_id + properties = [] + + if(isinstance(predicate, rdflib.term.URIRef)): + predicate = "<"+str(predicate)+">" + + output = subject+" "+predicate+" "+object+" ."+"\n" + if output is not None: + stream.write(output.encode()) + + + def _iri_or_bn(self, i_): + if isinstance(i_, URIRef): + return f"{i_}" + elif isinstance(i_, BNode): + return f"{i_.n3()}" + else: + return None + + def _context(self, context): + if self.graph_type == Graph: + return "" + if context.identifier == "urn:x-rdflib:default": + return "" + elif context is not None and self.default_context is not None: + if context.identifier == self.default_context.identifier: + return "" + return context.identifier diff --git a/rdflib/term.py b/rdflib/term.py index 5d1377a6f..4d5375104 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -33,6 +33,7 @@ "BNode", "Literal", "Variable", + "RdfstarTriple", ] import logging @@ -503,6 +504,95 @@ def skolemize( skolem = "%s%s" % (basepath, str(self)) return URIRef(urljoin(authority, skolem)) +class RdfstarTriple(IdentifiedNode): + + # thanks for JervenBolleman's example + # https://github.com/RDFLib/rdflib/issues/955 + + """ + Triple: Needed for RDF* + """ + _subject = None + _predicate = None + _object = None + _hashvalue = None + _hashvalueid = None + __slots__ = () + # def __init__(self, sid=None, + # subject=None, predicate=None, object=None): + def __new__(self, hashvalue: Optional[str] + )-> "RdfstarTriple": + # self._subject = subject + # self._predicate = predicate + # self._object = object + # self._sid = sid #Statement Identifier + self._hashvalue = hashvalue + if hashvalue is None: + print("error, every quoted triple should have the only hash value and not None") + return Identifier.__new__(rdflib.term.RdfstarTriple, hashvalue) + + # def toPython(self): + # return self._subject.toPython() + self._predicate.toPython() + self._object.toPython() + + def n3(self, namespace_manager=None): + return "_:%s" % self + "rdfstartriple" + + def __repr__(self) -> str: + if self.__class__ is RdfstarTriple: + clsName = "rdflib.term.RdfstarTriple" + else: + clsName = self.__class__.__name__ + return """%s('%s')""" % (clsName, str(self)) + + # def asQuad(self): + # _rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + # _type = URIRef('type',base=_rdf) + # _rdf_object = URIRef('object', base=_rdf) + # _rdf_predicate = URIRef('predicate', base=_rdf) + # _rdf_subject = URIRef('subject', base=_rdf) + # _rdf_statement = URIRef('Statement', base=_rdf) + # return [[self._sid, _type , _rdf_statement], + # [self._sid , _rdf_object, self. object], + # [self._sid, _rdf_predicate, self._predicate], + # [self._sid, _rdf_subject, self._subject]] + + def subject(self): + return self._subject + + def predicate(self): + return self._predicate + + def object(self): + return self._object + + def hashvalueid(self): + return self._hashvalueid + + def setSubject(self, subject): + self._subject = subject + + def setPredicate(self, predicate): + self._predicate = predicate + + def setObject(self, object): + self._object = object + + def sid(self, hashvalueid): + self._hashvalueid = hashvalueid + + + # def __ne__(self, other): + # return not self.__eq__(other) + + # def __eq__(self, other): + # if (not isinstance(other, self.__class__)): + # return False + # return self._subject == other._subject and \ + # self._predicate == other._predicate and \ + # self._object == other._object + + # def __hash__(self): + # return hash(self._subject) | hash(self._predicate) | hash(self._object) class Literal(Identifier): __doc__ = """ diff --git a/test/ntriples-star/ntriples-star-bad-syntax-1.nt b/test/ntriples-star/ntriples-star-bad-syntax-1.nt new file mode 100644 index 000000000..401f4b861 --- /dev/null +++ b/test/ntriples-star/ntriples-star-bad-syntax-1.nt @@ -0,0 +1 @@ + << >> . diff --git a/test/ntriples-star/ntriples-star-bad-syntax-2.nt b/test/ntriples-star/ntriples-star-bad-syntax-2.nt new file mode 100644 index 000000000..1e47b3600 --- /dev/null +++ b/test/ntriples-star/ntriples-star-bad-syntax-2.nt @@ -0,0 +1 @@ +<< "XYZ" >> . diff --git a/test/ntriples-star/ntriples-star-bad-syntax-3.nt b/test/ntriples-star/ntriples-star-bad-syntax-3.nt new file mode 100644 index 000000000..eaeb6f2a5 --- /dev/null +++ b/test/ntriples-star/ntriples-star-bad-syntax-3.nt @@ -0,0 +1 @@ +<< "XYZ" >> . diff --git a/test/ntriples-star/ntriples-star-bad-syntax-4.nt b/test/ntriples-star/ntriples-star-bad-syntax-4.nt new file mode 100644 index 000000000..af41d2064 --- /dev/null +++ b/test/ntriples-star/ntriples-star-bad-syntax-4.nt @@ -0,0 +1 @@ +<< _:label >> . diff --git a/test/ntriples-star/ntriples-star-bnode-1.nt b/test/ntriples-star/ntriples-star-bnode-1.nt new file mode 100644 index 000000000..2d94448ce --- /dev/null +++ b/test/ntriples-star/ntriples-star-bnode-1.nt @@ -0,0 +1,2 @@ +_:b0 . +<< _:b0 >> "ABC" . diff --git a/test/ntriples-star/ntriples-star-bnode-2.nt b/test/ntriples-star/ntriples-star-bnode-2.nt new file mode 100644 index 000000000..2dc337a93 --- /dev/null +++ b/test/ntriples-star/ntriples-star-bnode-2.nt @@ -0,0 +1,2 @@ + _:b1 . +<< _:b1 >> "456"^^ . diff --git a/test/ntriples-star/ntriples-star-nested-1.nt b/test/ntriples-star/ntriples-star-nested-1.nt new file mode 100644 index 000000000..d6a50cbee --- /dev/null +++ b/test/ntriples-star/ntriples-star-nested-1.nt @@ -0,0 +1,3 @@ + . +<< >> . +<< << >> >> "1"^^ . diff --git a/test/ntriples-star/ntriples-star-nested-2.nt b/test/ntriples-star/ntriples-star-nested-2.nt new file mode 100644 index 000000000..7a8d3131b --- /dev/null +++ b/test/ntriples-star/ntriples-star-nested-2.nt @@ -0,0 +1,3 @@ + . + << >> . +<< << >> >> . diff --git a/test/ntriples-star/ntriples-star-syntax-1.nt b/test/ntriples-star/ntriples-star-syntax-1.nt new file mode 100644 index 000000000..7f2be99c3 --- /dev/null +++ b/test/ntriples-star/ntriples-star-syntax-1.nt @@ -0,0 +1 @@ +<< >> . diff --git a/test/ntriples-star/ntriples-star-syntax-2.nt b/test/ntriples-star/ntriples-star-syntax-2.nt new file mode 100644 index 000000000..5aa2cb200 --- /dev/null +++ b/test/ntriples-star/ntriples-star-syntax-2.nt @@ -0,0 +1 @@ + << >> . diff --git a/test/ntriples-star/ntriples-star-syntax-3.nt b/test/ntriples-star/ntriples-star-syntax-3.nt new file mode 100644 index 000000000..16b9420af --- /dev/null +++ b/test/ntriples-star/ntriples-star-syntax-3.nt @@ -0,0 +1 @@ +<< >> << >> . diff --git a/test/ntriples-star/ntriples-star-syntax-4.nt b/test/ntriples-star/ntriples-star-syntax-4.nt new file mode 100644 index 000000000..6dc18c359 --- /dev/null +++ b/test/ntriples-star/ntriples-star-syntax-4.nt @@ -0,0 +1 @@ +<<>><<>>. diff --git a/test/ntriples-star/ntriples-star-syntax-5.nt b/test/ntriples-star/ntriples-star-syntax-5.nt new file mode 100644 index 000000000..041b85739 --- /dev/null +++ b/test/ntriples-star/ntriples-star-syntax-5.nt @@ -0,0 +1 @@ +<<<<>><<>>>><<<<>><<>>>>. \ No newline at end of file diff --git a/test/rdf-star/test_Rdfstar.py b/test/rdf-star/test_Rdfstar.py new file mode 100644 index 000000000..5f88e5343 --- /dev/null +++ b/test/rdf-star/test_Rdfstar.py @@ -0,0 +1,210 @@ + +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser + +register( + "ttls", + Parser, + "rdflib.plugins.parsers.turtlestar", + "TurtleParser", +) + +# tests should be past +def test_TurtlePositiveSyntax_subject(): + g = Graph() + assert isinstance((g.parse(data="turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls"), Graph)) + +# def test_TurtlePositiveSyntax_object(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-basic-02.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_quotedtripleinsideblankNodePropertyList(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-inside-01.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_quotedtripleinsidecollection(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-inside-02.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_nestedquotedtriplesubjectposition(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-nested-01.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_nestedquotedtripleobjectposition(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-nested-02.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_compoundforms(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-compound.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_blanknodesubject(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bnode-01.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_blanknodeobject(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bnode-02.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_blanknode(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bnode-03.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Annotationform(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-annotation-1.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Annotationexample(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/turtle-star-annotation-2.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_subjectquotedtriple(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-1.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_objectquotedtriple(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-2.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_subjectandobjectquotedtriples(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-3.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_whitespaceandterms(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-4.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Nestednowhitespace(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-5.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Blanknodesubject(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-bnode-1.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Blanknodeobject(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-bnode-2.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Nestedsubjectterm(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-nested-1.ttl"), Graph)) + +# def test_TurtlePositiveSyntax_Nestedsubjectterm(): +# g = Graph() +# assert isinstance((g.parse("turtle-star/nt-ttl-star-nested-2.ttl"), Graph)) + +# # tests should be broken + +# def test_TurtleNegativeSyntax_Badquotedtripleliteralsubject(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-1.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_Badquotedtripleliteralsubject(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-2.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_Badquotedtripleliteralpredicate(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-3.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_Badquotedtripleblanknodepredicate(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-4.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_badquotedtripleaspredicate(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-01.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_badquotedtripleoutsidetriple(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-02.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_collectionlistinquotedtriple(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-03.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_badliteralinsubjectpositionofquotedtriple(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-04.ttl"), Graph)) +# except: +# assert True + +# def test_TurtleNegativeSyntax_blanknodeaspredicateinquotedtriple(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-05.ttl"), Graph)) +# except: +# assert True + +# def test_TurtlePositiveSyntax_compoundblanknodeexpression(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-06.ttl"), Graph)) +# except: +# assert True + +# def test_TurtlePositiveSyntax_ncompletequotetriple(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-07.ttl"), Graph)) +# except: +# assert True + +# def test_TurtlePositiveSyntax_overlongquotedtriple(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-08.ttl"), Graph)) +# except: +# assert True + +# def test_TurtlePositiveSyntax_emptyannotation(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-ann-1.ttl"), Graph)) +# except: +# assert True + +# def test_TurtlePositiveSyntax_tripleasannotation(): +# g = Graph() +# try: +# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-ann-2.ttl"), Graph)) +# except: +# assert True + +if __name__ == "__main__": + pytest.main() diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl new file mode 100644 index 000000000..401f4b861 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl @@ -0,0 +1 @@ + << >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl new file mode 100644 index 000000000..1e47b3600 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl @@ -0,0 +1 @@ +<< "XYZ" >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl new file mode 100644 index 000000000..eaeb6f2a5 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl @@ -0,0 +1 @@ +<< "XYZ" >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl new file mode 100644 index 000000000..af41d2064 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl @@ -0,0 +1 @@ +<< _:label >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl new file mode 100644 index 000000000..2d94448ce --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl @@ -0,0 +1,2 @@ +_:b0 . +<< _:b0 >> "ABC" . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl new file mode 100644 index 000000000..2dc337a93 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl @@ -0,0 +1,2 @@ + _:b1 . +<< _:b1 >> "456"^^ . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl new file mode 100644 index 000000000..d6a50cbee --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl @@ -0,0 +1,3 @@ + . +<< >> . +<< << >> >> "1"^^ . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl new file mode 100644 index 000000000..7a8d3131b --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl @@ -0,0 +1,3 @@ + . + << >> . +<< << >> >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl new file mode 100644 index 000000000..7f2be99c3 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl @@ -0,0 +1 @@ +<< >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl new file mode 100644 index 000000000..5aa2cb200 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl @@ -0,0 +1 @@ + << >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl new file mode 100644 index 000000000..16b9420af --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl @@ -0,0 +1 @@ +<< >> << >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl new file mode 100644 index 000000000..6dc18c359 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl @@ -0,0 +1 @@ +<<>><<>>. diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl new file mode 100644 index 000000000..041b85739 --- /dev/null +++ b/test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl @@ -0,0 +1 @@ +<<<<>><<>>>><<<<>><<>>>>. \ No newline at end of file diff --git a/test/rdf-star/turtle-star/turtle-star-annotation-1.ttl b/test/rdf-star/turtle-star/turtle-star-annotation-1.ttl new file mode 100644 index 000000000..fdba9df1d --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-annotation-1.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p :o {| :r :z |} . diff --git a/test/rdf-star/turtle-star/turtle-star-annotation-2.ttl b/test/rdf-star/turtle-star/turtle-star-annotation-2.ttl new file mode 100644 index 000000000..06ef8eaf4 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-annotation-2.ttl @@ -0,0 +1,10 @@ +PREFIX : +PREFIX xsd: + +:s :p :o {| :source [ :graph ; + :date "2020-01-20"^^xsd:date + ] ; + :source [ :graph ; + :date "2020-12-31"^^xsd:date + ] + |} . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl new file mode 100644 index 000000000..4a41e436c --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +:x <<:s :p :o>> 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl new file mode 100644 index 000000000..8a1fbb388 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +<<:s :p :o>> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl new file mode 100644 index 000000000..3952b24b6 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p ("abc") . +<<:s :p ("abc") >> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl new file mode 100644 index 000000000..a69b6469d --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +<<3 :p :o >> :q :z . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl new file mode 100644 index 000000000..b1a45960a --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl @@ -0,0 +1,3 @@ +PREFIX : + +<<:s [] :o>> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl new file mode 100644 index 000000000..687ef35c8 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl @@ -0,0 +1,4 @@ +PREFIX : + + +<<:s :p [ :p1 :o1 ] >> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl new file mode 100644 index 000000000..329e6d39b --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p << :p :r >> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl new file mode 100644 index 000000000..72ceff2e2 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p << :g :s :p :o >> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl new file mode 100644 index 000000000..7f654f4eb --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl @@ -0,0 +1,6 @@ +PREFIX : + +SELECT * { + :s :p :o {| |} . +} + diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl new file mode 100644 index 000000000..1abed88fb --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:a :b :c {| :s :p :o |} . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl new file mode 100644 index 000000000..3874f92ef --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +<<:s :p :o>> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl new file mode 100644 index 000000000..a2b95e77d --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +:x :p <<:s :p :o>> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl new file mode 100644 index 000000000..6591a66e1 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +_:a :p :o . +<<_:a :p :o >> :q 456 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl new file mode 100644 index 000000000..ac99ad739 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p _:a . +<<:s :p _:a >> :q 456 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl new file mode 100644 index 000000000..95f8cf5f2 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl @@ -0,0 +1,3 @@ +PREFIX : + +<<[] :p [] >> :q :z . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl new file mode 100644 index 000000000..df83d9704 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl @@ -0,0 +1,11 @@ +PREFIX : + + +:x :r :z . +:a :b :c . +<<:a :b :c>> :r :z . +<<:x :r :z >> :p <<:a :b :c>> . + +<< <<:x :r :z >> :p <<:a :b :c>> >> + :q +<< <<:x :r :z >> :p <<:a :b :c>> >> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl new file mode 100644 index 000000000..ff87a146f --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +[ :q <<:s :p :o>> ] :b :c . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl new file mode 100644 index 000000000..a6f82e027 --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl @@ -0,0 +1,5 @@ +PREFIX : + +:s :p :o1 . +:s :p :o2 . +( <<:s :p :o1>> ( <<:s :p :o1>> <<:s :p :o2>> ) ) :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl new file mode 100644 index 000000000..93a936cab --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl @@ -0,0 +1,7 @@ +PREFIX : + +:s :p :o . + +<<:s :p :o >> :r :z . + +<< <<:s :p :o >> :r :z >> :q 1 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl new file mode 100644 index 000000000..aec49ef4a --- /dev/null +++ b/test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl @@ -0,0 +1,5 @@ +PREFIX : + +:s :p :o . +:a :q <<:s :p :o >> . +<< :a :q <<:s :p :o >>>> :r :z . diff --git a/test/test_parser_ntriplesstar.py b/test/test_parser_ntriplesstar.py new file mode 100644 index 000000000..f4fc898fc --- /dev/null +++ b/test/test_parser_ntriplesstar.py @@ -0,0 +1,96 @@ + +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser + +register( + "ntstar", + Parser, + "rdflib.plugins.parsers.ntriples-star", + "NtriplesStarParser", +) + +# tests should be past +def test_NtriplesPositiveSyntax_subject(): + g = Graph() + assert isinstance(g.parse(data="ntriples-star/ntriples-star-syntax-1.nt", format = "ntstar"), Graph) + +def test_NtriplesPositiveSyntax_object(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-syntax-2.nt", format = "ntstar"), Graph) + +def test_NtriplesPositiveSyntax_quotedtripleinsideblankNodePropertyList(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-syntax-3.nt", format = "ntstar"), Graph) + +def test_NtriplesPositiveSyntax_quotedtripleinsidecollection(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-syntax-4.nt", format = "ntstar"), Graph) + +################################# +def test_NtriplesPositiveSyntax_nestedquotedtriplesubjectposition(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-syntax-5.nt", format = "ntstar"), Graph) + +def test_NtriplesPositiveSyntax_nestedquotedtripleobjectposition(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-bnode-1.nt", format = "ntstar"), Graph) + print(g.serialize()) + # for s, p, o, g in g.quads((None, RDF.type, None, None)): + # print(s) + +def test_NtriplesPositiveSyntax_compoundforms(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-bnode-2.nt", format = "ntstar"), Graph) + +def test_NtriplesPositiveSyntax_blanknodesubject(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-nested-1.nt", format = "ntstar"), Graph) + +def test_NtriplesPositiveSyntax_blanknodeobject(): + g = Graph() + assert isinstance(g.parse("ntriples-star/ntriples-star-nested-2.nt", format = "ntstar"), Graph) + +# tests should be broken + +def test_NtriplesNegativeSyntax_Badquotedtripleaspredicate(): + g = Graph() + try: + assert isinstance(g.parse("ntriples-star/ntriples-star-bad-syntax-1.nt", format = "ntstar"), Graph) + except: + pytest.xfail("Bad quoted triple literal subject") + +def test_NtriplesNegativeSyntax_Badquotedtripleliteralsubject(): + g = Graph() + try: + assert isinstance(g.parse("ntriples-star/ntriples-star-bad-syntax-2.nt", format = "ntstar"), Graph) + except: + pytest.xfail("Bad quoted triple literal subject") + +def test_NtriplesNegativeSyntax_Badquotedtripleliteralpredicate(): + g = Graph() + try: + assert isinstance(g.parse("ntriples-star/ntriples-star-bad-syntax-3.nt", format = "ntstar"), Graph) + except: + pytest.xfail("Badquotedtripleliteralpredicate") + +def test_NtriplesNegativeSyntax_Badquotedtripleblanknodepredicate(): + g = Graph() + try: + assert isinstance(g.parse("ntriples-star/ntriples-star-bad-syntax-4.nt", format = "ntstar"), Graph) + except: + pytest.xfail("Badquotedtripleblanknodepredicate") + +if __name__ == "__main__": + pytest.main() diff --git a/test/test_parser_trigstar.py b/test/test_parser_trigstar.py new file mode 100644 index 000000000..7d44c9907 --- /dev/null +++ b/test/test_parser_trigstar.py @@ -0,0 +1,148 @@ + +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser + +register( + "trigs", + Parser, + "rdflib.plugins.parsers.trigstar", + "TrigParser", +) + +# tests should be past +def test_TestTrigPositiveSyntax(): + g = Graph() + assert isinstance(g.parse(data="trig-star/trig-star-syntax-basic-01.trig", format = "trigs"), Graph) + +def test_TestTrigPositiveSyntax(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-basic-02.trig", format = "trigs"), Graph) + +def test_TurtlePositiveSyntax_quotedtripleinsideblankNodePropertyList(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-inside-01.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_quotedtripleinsidecollection(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-inside-02.trig", format = "trigs"), Graph) + +################################# +def test_TrigPositiveSyntax_nestedquotedtriplesubjectposition(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-nested-01.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_nestedquotedtripleobjectposition(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-nested-02.trig", format = "trigs"), Graph) + print(g.serialize()) + # for s, p, o, g in g.quads((None, RDF.type, None, None)): + # print(s) + +def test_TrigPositiveSyntax_compoundforms(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-compound.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_blanknodesubject(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-bnode-01.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_blanknodeobject(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-bnode-02.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_blanknode(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-syntax-bnode-03.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_Annotationform(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-annotation-1.trig", format = "trigs"), Graph) + +def test_TrigPositiveSyntax_Annotationexample(): + g = Graph() + assert isinstance(g.parse("trig-star/trig-star-annotation-2.trig", format = "trigs"), Graph) + +def test_TrigNegativeSyntax_badquotedtripleaspredicate(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-01.trig", format = "trigs"), Graph) + except: + pytest.xfail("Badquotedtripleblanknodepredicate") + +def test_TrigNegativeSyntax_badquotedtripleoutsidetriple(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-02.trig", format = "trigs"), Graph) + except: + pytest.xfail("badquotedtripleoutsidetriple") + +def test_TrigNegativeSyntax_collectionlistinquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-03.trig", format = "trigs"), Graph) + except: + pytest.xfail("collectionlistinquotedtriple") + +def test_TrigNegativeSyntax_badliteralinsubjectpositionofquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-04.trig", format = "trigs"), Graph) + except: + pytest.xfail("badliteralinsubjectpositionofquotedtriple") + +def test_TrigNegativeSyntax_blanknodeaspredicateinquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-05.trig", format = "trigs"), Graph) + except: + pytest.xfail("blanknodeaspredicateinquotedtriple") + +def test_TrigPositiveSyntax_compoundblanknodeexpression(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-06.trig", format = "trigs"), Graph) + except: + pytest.xfail("compoundblanknodeexpression") + +def test_TrigPositiveSyntax_ncompletequotetriple(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-07.trig", format = "trigs"), Graph) + except: + pytest.xfail("ncompletequotetriple") + +def test_TrigPositiveSyntax_overlongquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-08.trig", format = "trigs"), Graph) + except: + pytest.xfail("overlongquotedtriple") + +def test_TrigPositiveSyntax_emptyannotation(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-ann-1.trig", format = "trigs"), Graph) + except: + pytest.xfail("emptyannotation") + +def test_TrigPositiveSyntax_tripleasannotation(): + g = Graph() + try: + assert isinstance(g.parse("trig-star/trig-star-syntax-bad-ann-2.trig", format = "trigs"), Graph) + except: + pytest.xfail("tripleasannotation") + +if __name__ == "__main__": + pytest.main() diff --git a/test/test_parser_turtlestar.py b/test/test_parser_turtlestar.py new file mode 100644 index 000000000..0c61dbfd9 --- /dev/null +++ b/test/test_parser_turtlestar.py @@ -0,0 +1,219 @@ + +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser + +register( + "ttls", + Parser, + "rdflib.plugins.parsers.turtlestar", + "TurtleParser", +) + +# tests should be past +def test_TurtlePositiveSyntax_subject(): + g = Graph() + assert isinstance(g.parse(data="turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls"), Graph) + print(g.serialize()) + # for s, p, o in g: + # print(s, p, o) + +def test_TurtlePositiveSyntax_object(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-basic-02.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_quotedtripleinsideblankNodePropertyList(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-inside-01.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_quotedtripleinsidecollection(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-inside-02.ttl", format = "ttls"), Graph) + +################################# +def test_TurtlePositiveSyntax_nestedquotedtriplesubjectposition(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-nested-01.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_nestedquotedtripleobjectposition(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-nested-02.ttl", format = "ttls"), Graph) + print(g.serialize()) + # for s, p, o, g in g.quads((None, RDF.type, None, None)): + # print(s) + +def test_TurtlePositiveSyntax_compoundforms(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-compound.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_blanknodesubject(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bnode-01.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_blanknodeobject(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bnode-02.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_blanknode(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bnode-03.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_Annotationform(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-annotation-1.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_Annotationexample(): + g = Graph() + assert isinstance(g.parse("turtle-star/turtle-star-annotation-2.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_subjectquotedtriple(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-syntax-1.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_objectquotedtriple(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-syntax-2.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_subjectandobjectquotedtriples(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-syntax-3.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_whitespaceandterms(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-syntax-4.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_Nestednowhitespace(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-syntax-5.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_Blanknodesubject(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-bnode-1.ttl", format = "ttls"), Graph) + +def test_TurtlePositiveSyntax_Blanknodeobject(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-bnode-2.ttl", format = "ttls"), Graph) + +################################## +def test_TurtlePositiveSyntax_Nestedsubjectterm1(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-nested-1.ttl", format = "ttls"), Graph) + print(g.serialize()) + +def test_TurtlePositiveSyntax_Nestedsubjectterm2(): + g = Graph() + assert isinstance(g.parse("turtle-star/nt-ttl-star-nested-2.ttl", format = "ttls"), Graph) + +# tests should be broken + +def test_TurtleNegativeSyntax_Badquotedtripleaspredicate(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/nt-ttl-star-bad-syntax-1.ttl", format = "ttls"), Graph) + except: + pytest.xfail("Bad quoted triple literal subject") + +def test_TurtleNegativeSyntax_Badquotedtripleliteralsubject(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/nt-ttl-star-bad-syntax-2.ttl", format = "ttls"), Graph) + except: + pytest.xfail("Bad quoted triple literal subject") + +def test_TurtleNegativeSyntax_Badquotedtripleliteralpredicate(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/nt-ttl-star-bad-syntax-3.ttl", format = "ttls"), Graph) + except: + pytest.xfail("Badquotedtripleliteralpredicate") + +def test_TurtleNegativeSyntax_Badquotedtripleblanknodepredicate(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/nt-ttl-star-bad-syntax-4.ttl", format = "ttls"), Graph) + except: + pytest.xfail("Badquotedtripleblanknodepredicate") + +def test_TurtleNegativeSyntax_badquotedtripleaspredicate(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-01.ttl", format = "ttls"), Graph) + except: + pytest.xfail("Badquotedtripleblanknodepredicate") + +def test_TurtleNegativeSyntax_badquotedtripleoutsidetriple(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-02.ttl", format = "ttls"), Graph) + except: + pytest.xfail("badquotedtripleoutsidetriple") + +def test_TurtleNegativeSyntax_collectionlistinquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-03.ttl", format = "ttls"), Graph) + except: + pytest.xfail("collectionlistinquotedtriple") + +def test_TurtleNegativeSyntax_badliteralinsubjectpositionofquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-04.ttl", format = "ttls"), Graph) + except: + pytest.xfail("badliteralinsubjectpositionofquotedtriple") + +def test_TurtleNegativeSyntax_blanknodeaspredicateinquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-05.ttl", format = "ttls"), Graph) + except: + pytest.xfail("blanknodeaspredicateinquotedtriple") + +def test_TurtlePositiveSyntax_compoundblanknodeexpression(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-06.ttl", format = "ttls"), Graph) + except: + pytest.xfail("compoundblanknodeexpression") + +def test_TurtlePositiveSyntax_ncompletequotetriple(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-07.ttl", format = "ttls"), Graph) + except: + pytest.xfail("ncompletequotetriple") + +def test_TurtlePositiveSyntax_overlongquotedtriple(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-08.ttl", format = "ttls"), Graph) + except: + pytest.xfail("overlongquotedtriple") + +def test_TurtlePositiveSyntax_emptyannotation(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-ann-1.ttl", format = "ttls"), Graph) + except: + pytest.xfail("emptyannotation") + +def test_TurtlePositiveSyntax_tripleasannotation(): + g = Graph() + try: + assert isinstance(g.parse("turtle-star/turtle-star-syntax-bad-ann-2.ttl", format = "ttls"), Graph) + except: + pytest.xfail("tripleasannotation") + +if __name__ == "__main__": + pytest.main() diff --git a/test/trig-star/trig-star-annotation-1.trig b/test/trig-star/trig-star-annotation-1.trig new file mode 100644 index 000000000..888c85193 --- /dev/null +++ b/test/trig-star/trig-star-annotation-1.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:s :p :o {| :r :z |} } diff --git a/test/trig-star/trig-star-annotation-2.trig b/test/trig-star/trig-star-annotation-2.trig new file mode 100644 index 000000000..0dc1e05fa --- /dev/null +++ b/test/trig-star/trig-star-annotation-2.trig @@ -0,0 +1,12 @@ +PREFIX : +PREFIX xsd: + +:G { + :s :p :o {| :source [ :graph ; + :date "2020-01-20"^^xsd:date + ] ; + :source [ :graph ; + :date "2020-12-31"^^xsd:date + ] + |} . +} diff --git a/test/trig-star/trig-star-syntax-bad-01.trig b/test/trig-star/trig-star-syntax-bad-01.trig new file mode 100644 index 000000000..6e718c72f --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-01.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p :o . + :x <<:s :p :o>> 123 . +} diff --git a/test/trig-star/trig-star-syntax-bad-02.trig b/test/trig-star/trig-star-syntax-bad-02.trig new file mode 100644 index 000000000..e6514f378 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-02.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p :o . + <<:s :p :o>> . +} diff --git a/test/trig-star/trig-star-syntax-bad-03.trig b/test/trig-star/trig-star-syntax-bad-03.trig new file mode 100644 index 000000000..9dfe4b5b6 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-03.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p ("abc") . + <<:s :p ("abc") >> :q 123 . +} diff --git a/test/trig-star/trig-star-syntax-bad-04.trig b/test/trig-star/trig-star-syntax-bad-04.trig new file mode 100644 index 000000000..798456005 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-04.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p :o . + <<3 :p :o >> :q :z . +} diff --git a/test/trig-star/trig-star-syntax-bad-05.trig b/test/trig-star/trig-star-syntax-bad-05.trig new file mode 100644 index 000000000..8e78f14f8 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-05.trig @@ -0,0 +1,5 @@ +PREFIX : + +:G { + <<:s [] :o>> :q 123 . +} diff --git a/test/trig-star/trig-star-syntax-bad-06.trig b/test/trig-star/trig-star-syntax-bad-06.trig new file mode 100644 index 000000000..6c8797ff1 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-06.trig @@ -0,0 +1,5 @@ +PREFIX : + +:G { + <<:s :p [ :p1 :o1 ] >> :q 123 . +} diff --git a/test/trig-star/trig-star-syntax-bad-07.trig b/test/trig-star/trig-star-syntax-bad-07.trig new file mode 100644 index 000000000..d2a0e2c34 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-07.trig @@ -0,0 +1,4 @@ +PREFIX : + + +:G {:s :p << :p :r >> .} diff --git a/test/trig-star/trig-star-syntax-bad-08.trig b/test/trig-star/trig-star-syntax-bad-08.trig new file mode 100644 index 000000000..03b67a315 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-08.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:s :p << :g :s :p :o >> .} diff --git a/test/trig-star/trig-star-syntax-bad-ann-1.trig b/test/trig-star/trig-star-syntax-bad-ann-1.trig new file mode 100644 index 000000000..04548514d --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-ann-1.trig @@ -0,0 +1,4 @@ +PREFIX : + +:G {:s :p :o {| |} .} + diff --git a/test/trig-star/trig-star-syntax-bad-ann-2.trig b/test/trig-star/trig-star-syntax-bad-ann-2.trig new file mode 100644 index 000000000..1d74c2c63 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bad-ann-2.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:a :b :c {| :s :p :o |} .} diff --git a/test/trig-star/trig-star-syntax-basic-01.trig b/test/trig-star/trig-star-syntax-basic-01.trig new file mode 100644 index 000000000..acbd98a3d --- /dev/null +++ b/test/trig-star/trig-star-syntax-basic-01.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p :o . + <<:s :p :o>> :q 123 . +} diff --git a/test/trig-star/trig-star-syntax-basic-02.trig b/test/trig-star/trig-star-syntax-basic-02.trig new file mode 100644 index 000000000..f1033de44 --- /dev/null +++ b/test/trig-star/trig-star-syntax-basic-02.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p :o . + :x :p <<:s :p :o>> . +} diff --git a/test/trig-star/trig-star-syntax-bnode-01.trig b/test/trig-star/trig-star-syntax-bnode-01.trig new file mode 100644 index 000000000..e6051df23 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bnode-01.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + _:a :p :o . + <<_:a :p :o >> :q 456 . +} diff --git a/test/trig-star/trig-star-syntax-bnode-02.trig b/test/trig-star/trig-star-syntax-bnode-02.trig new file mode 100644 index 000000000..8d5ddd246 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bnode-02.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p _:a . + <<:s :p _:a >> :q 456 . +} diff --git a/test/trig-star/trig-star-syntax-bnode-03.trig b/test/trig-star/trig-star-syntax-bnode-03.trig new file mode 100644 index 000000000..65df001f5 --- /dev/null +++ b/test/trig-star/trig-star-syntax-bnode-03.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {<<[] :p [] >> :q :z .} diff --git a/test/trig-star/trig-star-syntax-compound.trig b/test/trig-star/trig-star-syntax-compound.trig new file mode 100644 index 000000000..b6f1816f2 --- /dev/null +++ b/test/trig-star/trig-star-syntax-compound.trig @@ -0,0 +1,12 @@ +PREFIX : + +:G { + :x :r :z . + :a :b :c . + <<:a :b :c>> :r :z . + <<:x :r :z >> :p <<:a :b :c>> . + + << <<:x :r :z >> :p <<:a :b :c>> >> + :q + << <<:x :r :z >> :p <<:a :b :c>> >> . +} diff --git a/test/trig-star/trig-star-syntax-inside-01.trig b/test/trig-star/trig-star-syntax-inside-01.trig new file mode 100644 index 000000000..c88a87b6f --- /dev/null +++ b/test/trig-star/trig-star-syntax-inside-01.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + :s :p :o . + [ :q <<:s :p :o>> ] :b :c . +} diff --git a/test/trig-star/trig-star-syntax-inside-02.trig b/test/trig-star/trig-star-syntax-inside-02.trig new file mode 100644 index 000000000..472548249 --- /dev/null +++ b/test/trig-star/trig-star-syntax-inside-02.trig @@ -0,0 +1,7 @@ +PREFIX : + +:G { + :s :p :o1 . + :s :p :o2 . + ( <<:s :p :o1>> <<:s :p :o2>> ) :q 123 . +} diff --git a/test/trig-star/trig-star-syntax-nested-01.trig b/test/trig-star/trig-star-syntax-nested-01.trig new file mode 100644 index 000000000..6b0e977e9 --- /dev/null +++ b/test/trig-star/trig-star-syntax-nested-01.trig @@ -0,0 +1,9 @@ +PREFIX : + +:G { + :s :p :o . + + <<:s :p :o >> :r :z . + + << <<:s :p :o >> :r :z >> :q 1 . +} diff --git a/test/trig-star/trig-star-syntax-nested-02.trig b/test/trig-star/trig-star-syntax-nested-02.trig new file mode 100644 index 000000000..2a79fea91 --- /dev/null +++ b/test/trig-star/trig-star-syntax-nested-02.trig @@ -0,0 +1,7 @@ +PREFIX : + +:G { + :s :p :o . + :a :q <<:s :p :o >> . + << :a :q <<:s :p :o >>>> :r :z . +} diff --git a/test/turtle-star/nt-ttl-star-bad-syntax-1.ttl b/test/turtle-star/nt-ttl-star-bad-syntax-1.ttl new file mode 100644 index 000000000..401f4b861 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-bad-syntax-1.ttl @@ -0,0 +1 @@ + << >> . diff --git a/test/turtle-star/nt-ttl-star-bad-syntax-2.ttl b/test/turtle-star/nt-ttl-star-bad-syntax-2.ttl new file mode 100644 index 000000000..1e47b3600 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-bad-syntax-2.ttl @@ -0,0 +1 @@ +<< "XYZ" >> . diff --git a/test/turtle-star/nt-ttl-star-bad-syntax-3.ttl b/test/turtle-star/nt-ttl-star-bad-syntax-3.ttl new file mode 100644 index 000000000..eaeb6f2a5 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-bad-syntax-3.ttl @@ -0,0 +1 @@ +<< "XYZ" >> . diff --git a/test/turtle-star/nt-ttl-star-bad-syntax-4.ttl b/test/turtle-star/nt-ttl-star-bad-syntax-4.ttl new file mode 100644 index 000000000..af41d2064 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-bad-syntax-4.ttl @@ -0,0 +1 @@ +<< _:label >> . diff --git a/test/turtle-star/nt-ttl-star-bnode-1.ttl b/test/turtle-star/nt-ttl-star-bnode-1.ttl new file mode 100644 index 000000000..2d94448ce --- /dev/null +++ b/test/turtle-star/nt-ttl-star-bnode-1.ttl @@ -0,0 +1,2 @@ +_:b0 . +<< _:b0 >> "ABC" . diff --git a/test/turtle-star/nt-ttl-star-bnode-2.ttl b/test/turtle-star/nt-ttl-star-bnode-2.ttl new file mode 100644 index 000000000..2dc337a93 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-bnode-2.ttl @@ -0,0 +1,2 @@ + _:b1 . +<< _:b1 >> "456"^^ . diff --git a/test/turtle-star/nt-ttl-star-nested-1.ttl b/test/turtle-star/nt-ttl-star-nested-1.ttl new file mode 100644 index 000000000..d6a50cbee --- /dev/null +++ b/test/turtle-star/nt-ttl-star-nested-1.ttl @@ -0,0 +1,3 @@ + . +<< >> . +<< << >> >> "1"^^ . diff --git a/test/turtle-star/nt-ttl-star-nested-2.ttl b/test/turtle-star/nt-ttl-star-nested-2.ttl new file mode 100644 index 000000000..7a8d3131b --- /dev/null +++ b/test/turtle-star/nt-ttl-star-nested-2.ttl @@ -0,0 +1,3 @@ + . + << >> . +<< << >> >> . diff --git a/test/turtle-star/nt-ttl-star-syntax-1.ttl b/test/turtle-star/nt-ttl-star-syntax-1.ttl new file mode 100644 index 000000000..7f2be99c3 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-syntax-1.ttl @@ -0,0 +1 @@ +<< >> . diff --git a/test/turtle-star/nt-ttl-star-syntax-2.ttl b/test/turtle-star/nt-ttl-star-syntax-2.ttl new file mode 100644 index 000000000..5aa2cb200 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-syntax-2.ttl @@ -0,0 +1 @@ + << >> . diff --git a/test/turtle-star/nt-ttl-star-syntax-3.ttl b/test/turtle-star/nt-ttl-star-syntax-3.ttl new file mode 100644 index 000000000..16b9420af --- /dev/null +++ b/test/turtle-star/nt-ttl-star-syntax-3.ttl @@ -0,0 +1 @@ +<< >> << >> . diff --git a/test/turtle-star/nt-ttl-star-syntax-4.ttl b/test/turtle-star/nt-ttl-star-syntax-4.ttl new file mode 100644 index 000000000..6dc18c359 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-syntax-4.ttl @@ -0,0 +1 @@ +<<>><<>>. diff --git a/test/turtle-star/nt-ttl-star-syntax-5.ttl b/test/turtle-star/nt-ttl-star-syntax-5.ttl new file mode 100644 index 000000000..041b85739 --- /dev/null +++ b/test/turtle-star/nt-ttl-star-syntax-5.ttl @@ -0,0 +1 @@ +<<<<>><<>>>><<<<>><<>>>>. \ No newline at end of file diff --git a/test/turtle-star/test1234.ttl b/test/turtle-star/test1234.ttl new file mode 100644 index 000000000..043216a94 --- /dev/null +++ b/test/turtle-star/test1234.ttl @@ -0,0 +1,23 @@ +PREFIX : +PREFIX rdf: +PREFIX rdfstar: + +_:1084179ead99530a25c26ba5310a0ac9 + a rdfstar:AssertedStatement, rdfstar:QuotedStatement ; + rdf:subject ; + rdf:predicate ; + rdf:object ; +. +_:4590a20d0206a2af368765c037de246e + a rdfstar:AssertedStatement, rdfstar:QuotedStatement ; + rdf:subject ; + rdf:predicate ; + rdf:object _:1084179ead99530a25c26ba5310a0ac9 ; +. +_:4590a20d0206a2af568765c037de246e + a rdfstar:AssertedStatement ; + rdf:subject _:4590a20d0206a2af368765c037de246e ; + rdf:predicate ; + rdf:object ; +. +[ [ ] ; ]. diff --git a/test/turtle-star/test_rdfliteral.ttl b/test/turtle-star/test_rdfliteral.ttl new file mode 100644 index 000000000..571987763 --- /dev/null +++ b/test/turtle-star/test_rdfliteral.ttl @@ -0,0 +1,5 @@ +@prefix : . +@prefix xsd: . + +:s :source [:graph ; :date "2020-01-20"^^xsd:date] . +:s :source [:graph ; :date "2020-12-31"^^xsd:date] . diff --git a/test/turtle-star/turtle-star-annotation-1.ttl b/test/turtle-star/turtle-star-annotation-1.ttl new file mode 100644 index 000000000..fdba9df1d --- /dev/null +++ b/test/turtle-star/turtle-star-annotation-1.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p :o {| :r :z |} . diff --git a/test/turtle-star/turtle-star-annotation-2.ttl b/test/turtle-star/turtle-star-annotation-2.ttl new file mode 100644 index 000000000..06ef8eaf4 --- /dev/null +++ b/test/turtle-star/turtle-star-annotation-2.ttl @@ -0,0 +1,10 @@ +PREFIX : +PREFIX xsd: + +:s :p :o {| :source [ :graph ; + :date "2020-01-20"^^xsd:date + ] ; + :source [ :graph ; + :date "2020-12-31"^^xsd:date + ] + |} . diff --git a/test/turtle-star/turtle-star-syntax-bad-01.ttl b/test/turtle-star/turtle-star-syntax-bad-01.ttl new file mode 100644 index 000000000..316419f4a --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-01.ttl @@ -0,0 +1,14 @@ +PREFIX : + +:s :p :o . +:x <<:s :p :o>> 123 . + +g{ + :s :p :o . +} + +g2{ + :s :p :o . +} + +:g \ No newline at end of file diff --git a/test/turtle-star/turtle-star-syntax-bad-02.ttl b/test/turtle-star/turtle-star-syntax-bad-02.ttl new file mode 100644 index 000000000..8a1fbb388 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-02.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +<<:s :p :o>> . diff --git a/test/turtle-star/turtle-star-syntax-bad-03.ttl b/test/turtle-star/turtle-star-syntax-bad-03.ttl new file mode 100644 index 000000000..3952b24b6 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-03.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p ("abc") . +<<:s :p ("abc") >> :q 123 . diff --git a/test/turtle-star/turtle-star-syntax-bad-04.ttl b/test/turtle-star/turtle-star-syntax-bad-04.ttl new file mode 100644 index 000000000..a69b6469d --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-04.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +<<3 :p :o >> :q :z . diff --git a/test/turtle-star/turtle-star-syntax-bad-05.ttl b/test/turtle-star/turtle-star-syntax-bad-05.ttl new file mode 100644 index 000000000..b1a45960a --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-05.ttl @@ -0,0 +1,3 @@ +PREFIX : + +<<:s [] :o>> :q 123 . diff --git a/test/turtle-star/turtle-star-syntax-bad-06.ttl b/test/turtle-star/turtle-star-syntax-bad-06.ttl new file mode 100644 index 000000000..687ef35c8 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-06.ttl @@ -0,0 +1,4 @@ +PREFIX : + + +<<:s :p [ :p1 :o1 ] >> :q 123 . diff --git a/test/turtle-star/turtle-star-syntax-bad-07.ttl b/test/turtle-star/turtle-star-syntax-bad-07.ttl new file mode 100644 index 000000000..329e6d39b --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-07.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p << :p :r >> . diff --git a/test/turtle-star/turtle-star-syntax-bad-08.ttl b/test/turtle-star/turtle-star-syntax-bad-08.ttl new file mode 100644 index 000000000..72ceff2e2 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-08.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p << :g :s :p :o >> . diff --git a/test/turtle-star/turtle-star-syntax-bad-ann-1.ttl b/test/turtle-star/turtle-star-syntax-bad-ann-1.ttl new file mode 100644 index 000000000..7f654f4eb --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-ann-1.ttl @@ -0,0 +1,6 @@ +PREFIX : + +SELECT * { + :s :p :o {| |} . +} + diff --git a/test/turtle-star/turtle-star-syntax-bad-ann-2.ttl b/test/turtle-star/turtle-star-syntax-bad-ann-2.ttl new file mode 100644 index 000000000..1abed88fb --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bad-ann-2.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:a :b :c {| :s :p :o |} . diff --git a/test/turtle-star/turtle-star-syntax-basic-01.ttl b/test/turtle-star/turtle-star-syntax-basic-01.ttl new file mode 100644 index 000000000..3874f92ef --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-basic-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +<<:s :p :o>> :q 123 . diff --git a/test/turtle-star/turtle-star-syntax-basic-02.ttl b/test/turtle-star/turtle-star-syntax-basic-02.ttl new file mode 100644 index 000000000..a2b95e77d --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-basic-02.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +:x :p <<:s :p :o>> . diff --git a/test/turtle-star/turtle-star-syntax-bnode-01.ttl b/test/turtle-star/turtle-star-syntax-bnode-01.ttl new file mode 100644 index 000000000..6591a66e1 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bnode-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +_:a :p :o . +<<_:a :p :o >> :q 456 . diff --git a/test/turtle-star/turtle-star-syntax-bnode-02.ttl b/test/turtle-star/turtle-star-syntax-bnode-02.ttl new file mode 100644 index 000000000..ac99ad739 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bnode-02.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p _:a . +<<:s :p _:a >> :q 456 . diff --git a/test/turtle-star/turtle-star-syntax-bnode-03.ttl b/test/turtle-star/turtle-star-syntax-bnode-03.ttl new file mode 100644 index 000000000..95f8cf5f2 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-bnode-03.ttl @@ -0,0 +1,3 @@ +PREFIX : + +<<[] :p [] >> :q :z . diff --git a/test/turtle-star/turtle-star-syntax-compound.ttl b/test/turtle-star/turtle-star-syntax-compound.ttl new file mode 100644 index 000000000..df83d9704 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-compound.ttl @@ -0,0 +1,11 @@ +PREFIX : + + +:x :r :z . +:a :b :c . +<<:a :b :c>> :r :z . +<<:x :r :z >> :p <<:a :b :c>> . + +<< <<:x :r :z >> :p <<:a :b :c>> >> + :q +<< <<:x :r :z >> :p <<:a :b :c>> >> . diff --git a/test/turtle-star/turtle-star-syntax-inside-01.ttl b/test/turtle-star/turtle-star-syntax-inside-01.ttl new file mode 100644 index 000000000..cf8b122ba --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-inside-01.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o . +[ :q <<:s :p :o>>, <<:s :p :o>> ] :b :c . diff --git a/test/turtle-star/turtle-star-syntax-inside-02.ttl b/test/turtle-star/turtle-star-syntax-inside-02.ttl new file mode 100644 index 000000000..a6f82e027 --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-inside-02.ttl @@ -0,0 +1,5 @@ +PREFIX : + +:s :p :o1 . +:s :p :o2 . +( <<:s :p :o1>> ( <<:s :p :o1>> <<:s :p :o2>> ) ) :q 123 . diff --git a/test/turtle-star/turtle-star-syntax-nested-01.ttl b/test/turtle-star/turtle-star-syntax-nested-01.ttl new file mode 100644 index 000000000..93a936cab --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-nested-01.ttl @@ -0,0 +1,7 @@ +PREFIX : + +:s :p :o . + +<<:s :p :o >> :r :z . + +<< <<:s :p :o >> :r :z >> :q 1 . diff --git a/test/turtle-star/turtle-star-syntax-nested-02.ttl b/test/turtle-star/turtle-star-syntax-nested-02.ttl new file mode 100644 index 000000000..aec49ef4a --- /dev/null +++ b/test/turtle-star/turtle-star-syntax-nested-02.ttl @@ -0,0 +1,5 @@ +PREFIX : + +:s :p :o . +:a :q <<:s :p :o >> . +<< :a :q <<:s :p :o >>>> :r :z . diff --git a/test_serializer_ntriplesstar.py b/test_serializer_ntriplesstar.py new file mode 100644 index 000000000..3e08a42fe --- /dev/null +++ b/test_serializer_ntriplesstar.py @@ -0,0 +1,56 @@ +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser +from rdflib.serializer import Serializer + +import rdflib +from rdflib import URIRef +from rdflib.namespace import RDF +from rdflib.namespace import FOAF + +g = Graph() +g.parse(data="test/ntriples-star/ntriples-star-syntax-1.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-syntax-2.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-syntax-3.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-syntax-4.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-syntax-5.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-bnode-1.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-bnode-2.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-nested-1.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) + +g = Graph() +g.parse("test/ntriples-star/ntriples-star-nested-2.nt", format = "ntstar") +print(g.serialize(format = "ntstar")) diff --git a/test_serializer_trigstar.py b/test_serializer_trigstar.py new file mode 100644 index 000000000..d265bcc33 --- /dev/null +++ b/test_serializer_trigstar.py @@ -0,0 +1,68 @@ +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser +from rdflib.serializer import Serializer + +import rdflib +from rdflib import URIRef +from rdflib.namespace import RDF +from rdflib.namespace import FOAF + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-basic-01.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-basic-02.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-bnode-01.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-bnode-02.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-bnode-03.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-compound.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-inside-01.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-inside-02.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-nested-01.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-syntax-nested-02.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-annotation-1.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = Graph() +g.parse(data="test/trig-star/trig-star-annotation-2.trig", format = "trigs") +print(g.serialize(format = "trigstar")) diff --git a/test_serializer_turtlestar.py b/test_serializer_turtlestar.py new file mode 100644 index 000000000..f7d08533b --- /dev/null +++ b/test_serializer_turtlestar.py @@ -0,0 +1,104 @@ +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser +from rdflib.serializer import Serializer + +import rdflib +from rdflib import URIRef +from rdflib.namespace import RDF +from rdflib.namespace import FOAF + +g = Graph() +g.parse(data="test/turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-basic-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-inside-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-inside-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-nested-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-nested-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-compound.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-bnode-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-bnode-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-bnode-03.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-annotation-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/turtle-star-annotation-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-3.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-4.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-5.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-bnode-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-bnode-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-nested-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-nested-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) From 464726b3922a65a6d4282d2feb488e7820ae76a0 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Sat, 17 Sep 2022 21:30:08 +1000 Subject: [PATCH 02/11] fixing nested annotation. now all evaluation/syntax tests past --- rdflib/plugins/parsers/turtlestar.py | 409 +++++++++++++++--- .../turtle-star-eval-01.ttl | 3 + .../turtle-star-eval-02.ttl | 3 + .../turtle-star-eval-annotation-1.ttl | 3 + .../turtle-star-eval-annotation-2.ttl | 10 + .../turtle-star-eval-annotation-3.ttl | 5 + .../turtle-star-eval-annotation-4.ttl | 3 + .../turtle-star-eval-annotation-5.ttl | 4 + .../turtle-star-eval-bnode-1.ttl | 4 + .../turtle-star-eval-bnode-2.ttl | 4 + .../turtle-star-eval-quoted-annotation-1.ttl | 3 + .../turtle-star-eval-quoted-annotation-2.ttl | 3 + .../turtle-star-eval-quoted-annotation-3.ttl | 3 + turtlestar-evaluationtest.py | 68 +++ 14 files changed, 468 insertions(+), 57 deletions(-) create mode 100644 test/turtlestar-evaluation/turtle-star-eval-01.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-02.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-annotation-1.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-annotation-2.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-annotation-3.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-annotation-4.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-annotation-5.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-bnode-1.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-bnode-2.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-1.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-2.ttl create mode 100644 test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-3.ttl create mode 100644 turtlestar-evaluationtest.py diff --git a/rdflib/plugins/parsers/turtlestar.py b/rdflib/plugins/parsers/turtlestar.py index 7501f8bda..f47daa5d2 100644 --- a/rdflib/plugins/parsers/turtlestar.py +++ b/rdflib/plugins/parsers/turtlestar.py @@ -260,26 +260,111 @@ def runNamespace(): Token, ) -# from pymantic.compat import ( -# binary_type, -# ) -# from pymantic.parsers.base import ( -# BaseParser, -# ) -# from pymantic.primitives import ( -# BlankNode, -# Literal, -# NamedNode, -# Triple, -# ) -# from pymantic.util import ( -# grouper, -# smart_urljoin, -# decode_literal, -# ) +from typing import List, Dict, Union, Callable, Iterable, Optional + +from lark import Lark +from lark.tree import Tree, ParseTree +from lark.visitors import Transformer_InPlace +from lark.lexer import Token, PatternStr, TerminalDef +from lark.grammar import Terminal, NonTerminal, Symbol + +from lark.tree_matcher import TreeMatcher, is_discarded_terminal +from lark.utils import is_id_continue + +def is_iter_empty(i): + try: + _ = next(i) + return False + except StopIteration: + return True + + +class WriteTokensTransformer(Transformer_InPlace): + "Inserts discarded tokens into their correct place, according to the rules of grammar" + + tokens: Dict[str, TerminalDef] + term_subs: Dict[str, Callable[[Symbol], str]] + + def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: + self.tokens = tokens + self.term_subs = term_subs + + def __default__(self, data, children, meta): + if not getattr(meta, 'match_tree', False): + return Tree(data, children) + + iter_args = iter(children) + to_write = [] + for sym in meta.orig_expansion: + if is_discarded_terminal(sym): + try: + v = self.term_subs[sym.name](sym) + except KeyError: + t = self.tokens[sym.name] + if not isinstance(t.pattern, PatternStr): + raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) + + v = t.pattern.value + to_write.append(v) + else: + x = next(iter_args) + if isinstance(x, list): + to_write += x + else: + if isinstance(x, Token): + assert Terminal(x.type) == sym, x + else: + assert NonTerminal(x.data) == sym, (sym, x) + to_write.append(x) + + assert is_iter_empty(iter_args) + return to_write + + +class Reconstructorv2(TreeMatcher): + """ + A Reconstructor that will, given a full parse Tree, generate source code. + Note: + The reconstructor cannot generate values from regexps. If you need to produce discarded + regexes, such as newlines, use `term_subs` and provide default values for them. + Paramters: + parser: a Lark instance + term_subs: a dictionary of [Terminal name as str] to [output text as str] + """ + + write_tokens: WriteTokensTransformer + + def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: + TreeMatcher.__init__(self, parser) + + self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) + + def _reconstruct(self, tree): + unreduced_tree = self.match_tree(tree, tree.data) + + res = self.write_tokens.transform(unreduced_tree) + for item in res: + if isinstance(item, Tree): + # TODO use orig_expansion.rulename to support templates + yield from self._reconstruct(item) + else: + yield item + + def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: + x = self._reconstruct(tree) + if postproc: + x = postproc(x) + y = [] + prev_item = '' + for item in x: + if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): + y.append(' ') + y.append(item) + prev_item = item + return ' '.join(y) grammar = r"""turtle_doc: statement* -?statement: directive | triples "." | quotedtriples "." +?statement: directive | triples "." directive: prefix_id | base | sparql_prefix | sparql_base prefix_id: "@prefix" PNAME_NS IRIREF "." base: BASE_DIRECTIVE IRIREF "." @@ -288,9 +373,8 @@ def runNamespace(): triples: subject predicate_object_list | blank_node_property_list predicate_object_list? insidequotation: qtsubject verb qtobject -quotedtriples: triples compoundanno predicate_object_list: verb object_list (";" (verb object_list)?)* -?object_list: object ("," object)* +?object_list: object compoundanno? ("," object compoundanno?)* ?verb: predicate | /a/ ?subject: iri | blank_node | collection | quotation ?predicate: iri @@ -353,6 +437,11 @@ def runNamespace(): turtle_lark = Lark(grammar, start="turtle_doc", parser="lalr", maybe_placeholders=False) +class Print_Tree(Visitor): + def print_quotation(self, tree): + assert tree.data == "quotation" + print(tree.children) + from lark import Visitor, v_args quotation_list = [] quotation_dict = dict() @@ -364,14 +453,105 @@ def runNamespace(): assertedtriplelist = [] quoted_or_not = False both_quoted_and_asserted = False +object_annotation_list = [] +annotation_s_p_o = [] +annotation_dict = dict() +to_remove = [] +output = "" def myHash(text:str): return str(hashlib.md5(text.encode('utf-8')).hexdigest()) +class Expandanotation(Visitor): + global annotation_s_p_o, to_remove + def __init__(self): + super().__init__() + self.variable_list = [] + + def triples(self, var): + + appends1 = [] + tri = Reconstructorv2(turtle_lark).reconstruct(var) + if "{|" in tri: + if len(var.children) == 2: + predicate_object_list2 = var.children[1].children + subject = Reconstructorv2(turtle_lark).reconstruct(var.children[0]) + po_list = [] + + for x in range(0, len(predicate_object_list2)): + + predicate_or_object = Reconstructorv2(turtle_lark).reconstruct(predicate_object_list2[x]) + po_list.append(predicate_or_object) + + if len(po_list) == 2: + if "," in po_list[1]: + po_lists = po_list[1].split(",") + + for y in po_lists: + + try: + object_annotation = y.split("{|",1) + o1 = object_annotation[0] + a1 = "{|"+object_annotation[1] + a1 = a1.strip() + a1_Dict = annotation_dict[a1] + spo_list = [subject,po_list[0],o1, a1_Dict] + + annotation_s_p_o.append(spo_list) + except: + spo_list = [subject,po_list[0],y] + annotation_s_p_o.append(spo_list) + else: + object_annotation = po_list[1].split("{|",1) + o1 = object_annotation[0] + a1 = "{|"+object_annotation[1] + a1_Dict = annotation_dict[a1] + spo_list = [subject, po_list[0], o1, a1_Dict] + annotation_s_p_o.append(spo_list) + po_list = [] + + to_remove.append(tri) + + + for x in var.children: + x1 = Reconstructorv2(turtle_lark).reconstruct(x) + + + + def compoundanno(self, var): + + appends1 = [] + tri2 = Reconstructorv2(turtle_lark).reconstruct(var) + + + for x in var.children[1].children: + + test = Reconstructorv2(turtle_lark).reconstruct(x) + + if "{|" in test: + test123 = test.split("{|",1) + + object = test123[0] + + test123.pop(0) + + test_annotation = "{|"+ "".join(test123) + result = annotation_dict[test_annotation] + + if not tri2 in annotation_dict: + annotation_dict[tri2] = [object,result] + else: + annotation_dict[tri2].append(object) + annotation_dict[tri2].append(result) + else: + if not tri2 in annotation_dict: + annotation_dict[tri2] = [test] + else: + annotation_dict[tri2].append(test) + class FindVariables(Visitor): def __init__(self): super().__init__() - # self.quotation_list = [] self.variable_list = [] def quotation(self, var): @@ -386,10 +566,11 @@ def quotation(self, var): quotation_dict[qut] = str(myHash(qut)) + "RdfstarTriple" qut_hash = ":" + str(myHash(qut)) - # try: + id = quotation_dict.get(vr) for x in quotation_dict: if x in vr: + vr = vr.replace(x, ":"+quotation_dict.get(x)) vr = vr.replace("<<", "") vr = vr.replace(">>", "") @@ -397,13 +578,14 @@ def quotation(self, var): output.pop(0) oa1 = Reconstructor(turtle_lark).reconstruct(var) oa1 = oa1.replace(";","") - # oa1 = oa1.replace(" ","") + output.append(oa1) - # print(quotationreif) + if (not (output in quotationreif)): quotationreif.append(output) def blank_node_property_list(self, var): + object_list = ((var.children[0]).children)[1].children for x in range(0, len(object_list)): @@ -416,6 +598,7 @@ def blank_node_property_list(self, var): object_list[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) except Exception as ex: + object_list = ((var.children[0]).children)[1] collection_quotation_reconstruct = Reconstructor(turtle_lark).reconstruct(object_list) collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") @@ -430,48 +613,22 @@ def blank_node_property_list(self, var): def collection(self, var): for x in range(0, len(var.children)): if var.children[x].data == 'quotation': + collection_quotation_reconstruct = Reconstructor(turtle_lark).reconstruct(var.children[x]) collection_quotation_reconstruct = collection_quotation_reconstruct.replace(";","") t2 = quotation_dict[collection_quotation_reconstruct] hasht2 = "_:" + t2 var.children[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) - def quotedtriples(self, var): - triple1 = None - subjecthash = "" - - for x in var.children: - if x.data == "triples": - triple1 = Reconstructor(turtle_lark).reconstruct(x) - triple1 = triple1.replace(";","") - - triple1 = "<<"+triple1+">>" - subjecthash = "_:" + str(myHash(triple1)) + "RdfstarTriple" - if not (triple1 in quotation_list): - quotation_list.append(triple1) - - quotation_dict[triple1] = str(myHash(triple1)) + "RdfstarTriple" - elif x.data == "compoundanno": - for y in x.children: - if (y != "{|") & (y!= "|}"): - count2 = 0 - quotationtriple = [] - for z in y.children: - count2+=1 - z2 = Reconstructor(turtle_lark).reconstruct(z) - quotationtriple.append(z2) - if count2 ==2: - quotationtriple.insert(0, subjecthash) - quotationannolist.append(quotationtriple) - count2 = 0 - quotationtriple = [] - def triples(self, var): appends1 = [] tri = Reconstructor(turtle_lark).reconstruct(var) + if ("[" in tri) and (not "RdfstarTriple" in tri) and (not "<<" in tri): + vblist.append([tri]) + else: tri = tri.replace(";", "") if not (tri in assertedtriplelist): @@ -482,17 +639,22 @@ def triples(self, var): for y in xc: try: x2 = Reconstructor(turtle_lark).reconstruct(y) + except: + appends1.pop(0) + appends1.append("standard reification") appends1.append(Reconstructor(turtle_lark).reconstruct(var)) appends1.append(" . \n") break - x2 = x2.replace(";","") + appends1.append(x2) else: + anyquotationin = False x1 = Reconstructor(turtle_lark).reconstruct(x) + appends1.append(x1) if not (appends1 in vblist): @@ -503,6 +665,7 @@ def insidequotation(self, var): for x in var.children: x1 = Reconstructor(turtle_lark).reconstruct(x) x1 = x1.replace(";","") + appends1.append(x1) if not (appends1 in vblist): @@ -515,12 +678,15 @@ def sparql_prefix(self, children): prefix_list.append(children) def base(self, children): + # print("base") base_directive, base_iriref = children + # print("base", base_directive, base_iriref) + # Workaround for lalr parser token ambiguity in python 2.7 if base_directive.startswith('@') and base_directive != '@base': raise ValueError('Unexpected @base: ' + base_directive) def RDFstarParsings(rdfstarstring): - global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted + global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted, to_remove, annotation_s_p_o, output quotationannolist = [] vblist = [] quotationreif = [] @@ -528,18 +694,95 @@ def RDFstarParsings(rdfstarstring): constructors = "" quoted_or_not = False both_quoted_and_asserted = False + output = "" + output_tree = "" + annotation_s_p_o = [] + to_remove = [] + annotation_dict = dict() tree = turtle_lark.parse(rdfstarstring) + + tt = Expandanotation().visit(tree) + + tree_after = Reconstructorv2(turtle_lark).reconstruct(tree) + + splittree_after = tree_after.split(">") + + PREFIX_substitute = dict() + for x in splittree_after: + + if "PREFIX" in x: + y = x + ">"+" " + "\n" + PREFIX_substitute[x+">"] = y + + for z in PREFIX_substitute: + tree_after = tree_after.replace(z, "") + + for z in PREFIX_substitute: + tree_after = PREFIX_substitute[z] + tree_after + + for x in to_remove: + + x = x + " ." + + tree_after = tree_after.replace(x, "") + tree_after = tree_after+ "\n" + if "PREFIX:" in tree_after: + tree_after = tree_after.replace("PREFIX:", "PREFIX :") + + def expand_to_rdfstar(x): + + global output + + spo = "<<"+x[0] +" "+x[1] + " " + x[2]+">>" + try: + if len(x[3]) == 2: + + output += spo + " "+ x[3][0] +" "+x[3][1] + "." + "\n" + + elif len(x[3]) == 3: + + output += spo + " "+ x[3][0] +" "+x[3][1] + "." + "\n" + + newspolist = [spo, x[3][0],x[3][1], x[3][2]] + + expand_to_rdfstar(newspolist) + else: + clist = [x[3][y:y+2] for y in range(0, len(x[3]),2)] + + + for z in clist: + + expand_to_rdfstar([x[0],x[1],x[2],z]) + except: + + pass + + output = "" + for x in annotation_s_p_o: + + output +=x[0] +" "+ x[1] +" "+ x[2] + "." + "\n" + expand_to_rdfstar(x) + + output_tree = tree_after+output + + tree = turtle_lark.parse(output_tree) + at = FindVariables().visit(tree) + for y in vblist: + for element_index in range(0, len(y)): if (y[element_index][0] == "_") & (not (element_index == 0)): y[element_index]=" "+y[element_index] result = "".join(y) + if "standard reification" in result: + result = result.replace("standard reification", "") constructors+=result else: result = result.replace(" ", "") + if result in assertedtriplelist: test1 = "<<"+result+">>" if test1 in quotation_list: @@ -556,12 +799,15 @@ def RDFstarParsings(rdfstarstring): both_quoted_and_asserted = False quoted_or_not = False result = "<<"+result+">>" + if not (result in quotation_list): for z in range(0,len(y)): if "<<" in y[z]: y[z] = y[z].replace(" ", "") + y[z] = "_:"+quotation_dict[y[z]] myvalue = str(myHash(result)) + try: subject = y[0] predicate = y[1] @@ -569,6 +815,7 @@ def RDFstarParsings(rdfstarstring): except: if len(y)==1: result2 = y[0] + constructors+=result2 constructors = constructors +".\n" continue @@ -579,6 +826,7 @@ def RDFstarParsings(rdfstarstring): next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" else: next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object else: value = quotation_dict[result] @@ -595,6 +843,7 @@ def RDFstarParsings(rdfstarstring): next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" else: next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object for z in quotationannolist: @@ -616,9 +865,11 @@ def RDFstarParsings(rdfstarstring): next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" else: next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + constructors+=next_rdf_object for x in range(0, len(prefix_list)): + prefix_list[x] = Reconstructor(turtle_lark).reconstruct(prefix_list[x]) constructors = prefix_list[x]+"\n"+constructors @@ -851,7 +1102,8 @@ def feed(self, octets: Union[str, bytes]): return i = self.directiveOrStatement(s, j) if i < 0: - # print("# next char: %s" % s) + # print("# next char: %s" % s[j-5:j+5]) + # print("asdadasd", i, j) self.BadSyntax(s, j, "expected directive or statement") def directiveOrStatement(self, argstr: str, h: int) -> int: @@ -1246,7 +1498,10 @@ def path(self, argstr: str, i: int, res): def anonymousNode(self, ln: str): """Remember or generate a term for one of these _: anonymous nodes""" + # print("anonymousNode", self._anonymousNodes.get(ln, None), self._context, self._reason2) if ("RdfstarTriple" in ln): + # print("new object") + # ln = ln.replace("RdfstarTriple", "") term = self._rdfstartripleNodes.get(ln, None) if term is not None: return term @@ -1314,6 +1569,7 @@ def node(self, argstr: str, i: int, res, subjectAlready=None): argstr, i, "EOF when ']' expected after [ " ) if argstr[j] != "]": + # print("asdadasd", argstr[j-1], argstr[j-2], argstr[j-3], argstr[j-4], argstr[j-5]) self.BadSyntax(argstr, j, "']' expected") res.append(subj) return j + 1 @@ -1465,6 +1721,7 @@ def addingquotedRdfstarTriple(self, quoted_triple_list, dira): self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[3], quoted_triple_list[1])) else: if dira == "->": + # print("making statement") quoted_triple_list[1].setSubject(quoted_triple_list[2]) quoted_triple_list[1].setPredicate(quoted_triple_list[3]) quoted_triple_list[1].setObject(quoted_triple_list[4]) @@ -1496,6 +1753,7 @@ def property_list(self, argstr: str, i: int, subj): self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 res: typing.List[Any] = [] + # print("node in propertylist", self.node(argstr, i, res, subj)) j = self.node(argstr, i, res, subj) if j < 0: self.BadSyntax(argstr, i, "bad {} or () or [] node after :- ") @@ -1510,12 +1768,23 @@ def property_list(self, argstr: str, i: int, subj): objs: typing.List[Any] = [] i = self.objectList(argstr, j, objs) + # print("objectList in propertylist", objs) if i < 0: self.BadSyntax(argstr, j, "objectList expected") + # assertedtriple = False + # quotedtriple = False + # assertedtriple_s_p_o = [] + # quotedtriple_hashnode_s_p_o = [] + # for obj in objs: + # dira, sym = v[0] + # print("test make statement objsssss", sym, subj, obj) + # if "RdfstarTriple" in subj: + for obj in objs: dira, sym = v[0] if "RdfstarTriple" in subj: + # print("asdasdasd", obj) if "rdf-star" in str(obj): if len(quoted_triple_list) > 2: quoted_triple_list = [] @@ -1523,9 +1792,11 @@ def property_list(self, argstr: str, i: int, subj): if (rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement') in quoted_triple_list) & (not (subj in quoted_triple_list)): quoted_triple_list.append(subj) if "#object" in sym: + # print("asdasdasd", quoted_triple_list) self.addingquotedRdfstarTriple(quoted_triple_list, dira) else: if dira == "->": + # print("tests ->", self._context, sym, subj, obj) self.makeStatement((self._context, sym, subj, obj)) else: self.makeStatement((self._context, sym, obj, subj)) @@ -1564,6 +1835,7 @@ def commaSeparatedList(self, argstr: str, j, res, what): self.BadSyntax(argstr, i, "bad list content") def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: + # print("object in objectList") i = self.object(argstr, i, res) if i < 0: return -1 @@ -1607,7 +1879,9 @@ def uri_ref2(self, argstr: str, i: int, res): try: ns = self._bindings[pfx] except KeyError: + # print("testuri2", pfx, ln) if pfx == "_": # Magic prefix 2001/05/30, can be changed + # print("anonymousNode in uriref2") res.append(self.anonymousNode(ln)) return j if not self.turtle and pfx == "": @@ -1849,6 +2123,7 @@ def qname(self, argstr: str, i: int, res): return -1 def object(self, argstr: str, i: int, res): + # print("subject in object") j = self.subject(argstr, i, res) if j >= 0: return j @@ -1877,6 +2152,7 @@ def object(self, argstr: str, i: int, res): return -1 def nodeOrLiteral(self, argstr: str, i: int, res): + # print("node in nodeOrLiteral") j = self.node(argstr, i, res) startline = self.lines # Remember where for error messages if j >= 0: @@ -1937,6 +2213,7 @@ def nodeOrLiteral(self, argstr: str, i: int, res): j = i if argstr[j : j + 2] == "^^": res2: typing.List[Any] = [] + # print("nodeorLiteral") j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] res.append(self._store.newLiteral(s, dt, lang)) @@ -2149,6 +2426,7 @@ def id(self): return BNode("_:Formula%s" % self.number) def newBlankNode(self, uri=None, why=None): + # print("newBlankNode in Formula") if uri is None: self.counter += 1 bn = BNode("f%sb%s" % (self.uuid, self.counter)) @@ -2157,6 +2435,7 @@ def newBlankNode(self, uri=None, why=None): return bn def newRdfstarTriple(self, hashvalue, uri=None, why=None): + # print("newRdfstarTriple in Formula") if uri is None: # self.counter += 1 rdfstartriple = RdfstarTriple(hashvalue = hashvalue) @@ -2206,12 +2485,16 @@ def newBlankNode( uri: Optional[str] = None, why: Optional[Callable[[], None]] = None, ) -> BNode: + # print("newBlankNode in RDFSink") if isinstance(arg, Formula): + # print("newBlankNode in Formula", arg, uri) return arg.newBlankNode(uri) elif isinstance(arg, Graph) or arg is None: + # print("newBlankNode in RDFSink Graph", arg, uri, self.uuid, self.counter,"n%sb%s" % (self.uuid, self.counter)) self.counter += 1 bn = BNode("n%sb%s" % (self.uuid, self.counter)) else: + # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "b")) bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) return bn @@ -2225,12 +2508,17 @@ def newRdfstarTriple( why: Optional[Callable[[], None]] = None, hashvalue: Optional[str] = None ) -> RdfstarTriple: + # print("newRdflibRdfstartriple in Formula") if isinstance(arg, Formula): + # print("testsv2", arg, uri) return arg.newRdfstarTriple(hashvalue = hashvalue) elif isinstance(arg, Graph) or arg is None: + # print("newRdflibRdfstartriple", hashvalue) # self.counter += 1 rdfstartriple = RdfstarTriple(hashvalue =hashvalue) else: + # print("newRdflibRdfstartriple",hashvalue) + # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "rdfstartriple")) rdfstartriple = RdfstarTriple(hashvalue =hashvalue) return rdfstartriple @@ -2241,6 +2529,7 @@ def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Liter return Literal(s, lang=lang) def newList(self, n: typing.List[Any], f: Optional[Formula]): + # print("testnewlist") nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") if not n: return nil @@ -2265,6 +2554,7 @@ def setDefaultNamespace(self, *args) -> str: return ":".join(repr(n) for n in args) def makeStatement(self, quadruple, why=None) -> None: + # print("testmakeStatement", quadruple) f, p, s, o = quadruple if hasattr(p, "formula"): @@ -2277,6 +2567,7 @@ def makeStatement(self, quadruple, why=None) -> None: # print s, p, o, '.' self.graph.add((s, p, o)) elif isinstance(f, Formula): + # print("quotedgraph added") f.quotedgraph.add((s, p, o)) else: f.add((s, p, o)) @@ -2284,6 +2575,7 @@ def makeStatement(self, quadruple, why=None) -> None: # return str(quadruple) def makerdfstarStatement(self, quadruple, why=None) -> None: + # print("testmakeStatement", quadruple) f, hashnode, p, s, o = quadruple if hasattr(p, "formula"): @@ -2292,6 +2584,7 @@ def makerdfstarStatement(self, quadruple, why=None) -> None: s = self.normalise(f, s) p = self.normalise(f, p) o = self.normalise(f, o) + # print("testmakerdfstarStatement", hashnode, s,p,o) if f == self.rootFormula: # print s, p, o, '.' self.graph.addStarTriple((hashnode, s, p, o)) @@ -2405,6 +2698,7 @@ def parse( # stream = source.getByteStream() # p.loadStream(stream) + # print("tests", source) if hasattr(source, "file"): f = open(source.file.name, "rb") rdbytes = f.read() @@ -2417,6 +2711,7 @@ def parse( bp = rdbytes.decode("utf-8") ou = RDFstarParsings(bp) + # print(ou) p.feed(ou) p.endDoc() for prefix, namespace in p._bindings.items(): diff --git a/test/turtlestar-evaluation/turtle-star-eval-01.ttl b/test/turtlestar-evaluation/turtle-star-eval-01.ttl new file mode 100644 index 000000000..ad4940b9f --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-01.ttl @@ -0,0 +1,3 @@ +PREFIX : + +<<:s :p :o>> :q :z . diff --git a/test/turtlestar-evaluation/turtle-star-eval-02.ttl b/test/turtlestar-evaluation/turtle-star-eval-02.ttl new file mode 100644 index 000000000..6e76ac6d6 --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-02.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:a :q <<:s :p :o>> . diff --git a/test/turtlestar-evaluation/turtle-star-eval-annotation-1.ttl b/test/turtlestar-evaluation/turtle-star-eval-annotation-1.ttl new file mode 100644 index 000000000..fdba9df1d --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-annotation-1.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p :o {| :r :z |} . diff --git a/test/turtlestar-evaluation/turtle-star-eval-annotation-2.ttl b/test/turtlestar-evaluation/turtle-star-eval-annotation-2.ttl new file mode 100644 index 000000000..06ef8eaf4 --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-annotation-2.ttl @@ -0,0 +1,10 @@ +PREFIX : +PREFIX xsd: + +:s :p :o {| :source [ :graph ; + :date "2020-01-20"^^xsd:date + ] ; + :source [ :graph ; + :date "2020-12-31"^^xsd:date + ] + |} . diff --git a/test/turtlestar-evaluation/turtle-star-eval-annotation-3.ttl b/test/turtlestar-evaluation/turtle-star-eval-annotation-3.ttl new file mode 100644 index 000000000..8b2bbd6a6 --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-annotation-3.ttl @@ -0,0 +1,5 @@ +PREFIX : + +:s :p :o {| :a :b |}; + :p2 :o2 {| :a2 :b2 |}, + :o3 {| :a3 :b3 |}. diff --git a/test/turtlestar-evaluation/turtle-star-eval-annotation-4.ttl b/test/turtlestar-evaluation/turtle-star-eval-annotation-4.ttl new file mode 100644 index 000000000..c07c701fe --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-annotation-4.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p :o {| :a :b {| :a2 :b2 |} |}. diff --git a/test/turtlestar-evaluation/turtle-star-eval-annotation-5.ttl b/test/turtlestar-evaluation/turtle-star-eval-annotation-5.ttl new file mode 100644 index 000000000..751dc6793 --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-annotation-5.ttl @@ -0,0 +1,4 @@ +PREFIX : + +:s :p :o1, :o2 {| :a :b |} . + diff --git a/test/turtlestar-evaluation/turtle-star-eval-bnode-1.ttl b/test/turtlestar-evaluation/turtle-star-eval-bnode-1.ttl new file mode 100644 index 000000000..a2c01fffa --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-bnode-1.ttl @@ -0,0 +1,4 @@ +PREFIX : + +_:b :p :o . +<<_:b :p :o>> :q :z . diff --git a/test/turtlestar-evaluation/turtle-star-eval-bnode-2.ttl b/test/turtlestar-evaluation/turtle-star-eval-bnode-2.ttl new file mode 100644 index 000000000..8b2e5425b --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-bnode-2.ttl @@ -0,0 +1,4 @@ +PREFIX : + +_:a :p1 _:a . +<<_:a :p1 _:a >> :q <<_:a :p2 :o>> . diff --git a/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-1.ttl b/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-1.ttl new file mode 100644 index 000000000..5245264af --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-1.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p :o {| :r <<:s1 :p1 :o1>> |} . diff --git a/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-2.ttl b/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-2.ttl new file mode 100644 index 000000000..e4f702148 --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-2.ttl @@ -0,0 +1,3 @@ +PREFIX : + +<<:s1 :p1 :o1>> :p :o {| :r :z |} . diff --git a/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-3.ttl b/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-3.ttl new file mode 100644 index 000000000..f19ee1159 --- /dev/null +++ b/test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-3.ttl @@ -0,0 +1,3 @@ +PREFIX : + +:s :p <<:s2 :p2 :o2>> {| :r :z |} . diff --git a/turtlestar-evaluationtest.py b/turtlestar-evaluationtest.py new file mode 100644 index 000000000..4672ca3bd --- /dev/null +++ b/turtlestar-evaluationtest.py @@ -0,0 +1,68 @@ +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser +from rdflib.serializer import Serializer + +import rdflib +from rdflib import URIRef +from rdflib.namespace import RDF +from rdflib.namespace import FOAF + +g = Graph() +g.parse(data="test/turtlestar-evaluation/turtle-star-eval-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-bnode-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-bnode-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-annotation-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-annotation-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-annotation-3.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-annotation-4.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-annotation-5.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) + +g = Graph() +g.parse("test/turtlestar-evaluation/turtle-star-eval-quoted-annotation-3.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) From 468b2bcc8a50e204fead444cbca46530cc04625d Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Sun, 18 Sep 2022 01:43:50 +1000 Subject: [PATCH 03/11] updating annotation fix to trigstar to fix nested tests for trig further evaluation --- rdflib/plugins/parsers/trigstar.py | 481 ++++++++++++++++++++------- rdflib/plugins/parsers/turtlestar.py | 2 +- 2 files changed, 357 insertions(+), 126 deletions(-) diff --git a/rdflib/plugins/parsers/trigstar.py b/rdflib/plugins/parsers/trigstar.py index 6e9c1016d..81541ab0b 100644 --- a/rdflib/plugins/parsers/trigstar.py +++ b/rdflib/plugins/parsers/trigstar.py @@ -282,13 +282,115 @@ def runNamespace(): # decode_literal, # ) +from typing import List, Dict, Union, Callable, Iterable, Optional + +from lark import Lark +from lark.tree import Tree, ParseTree +from lark.visitors import Transformer_InPlace +from lark.lexer import Token, PatternStr, TerminalDef +from lark.grammar import Terminal, NonTerminal, Symbol + +from lark.tree_matcher import TreeMatcher, is_discarded_terminal +from lark.utils import is_id_continue + +def is_iter_empty(i): + try: + _ = next(i) + return False + except StopIteration: + return True + + +class WriteTokensTransformer(Transformer_InPlace): + "Inserts discarded tokens into their correct place, according to the rules of grammar" + + tokens: Dict[str, TerminalDef] + term_subs: Dict[str, Callable[[Symbol], str]] + + def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: + self.tokens = tokens + self.term_subs = term_subs + + def __default__(self, data, children, meta): + if not getattr(meta, 'match_tree', False): + return Tree(data, children) + + iter_args = iter(children) + to_write = [] + for sym in meta.orig_expansion: + if is_discarded_terminal(sym): + try: + v = self.term_subs[sym.name](sym) + except KeyError: + t = self.tokens[sym.name] + if not isinstance(t.pattern, PatternStr): + raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) + + v = t.pattern.value + to_write.append(v) + else: + x = next(iter_args) + if isinstance(x, list): + to_write += x + else: + if isinstance(x, Token): + assert Terminal(x.type) == sym, x + else: + assert NonTerminal(x.data) == sym, (sym, x) + to_write.append(x) + + assert is_iter_empty(iter_args) + return to_write + + +class Reconstructorv2(TreeMatcher): + """ + A Reconstructor that will, given a full parse Tree, generate source code. + Note: + The reconstructor cannot generate values from regexps. If you need to produce discarded + regexes, such as newlines, use `term_subs` and provide default values for them. + Paramters: + parser: a Lark instance + term_subs: a dictionary of [Terminal name as str] to [output text as str] + """ + + write_tokens: WriteTokensTransformer + + def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: + TreeMatcher.__init__(self, parser) + + self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) + + def _reconstruct(self, tree): + unreduced_tree = self.match_tree(tree, tree.data) + + res = self.write_tokens.transform(unreduced_tree) + for item in res: + if isinstance(item, Tree): + # TODO use orig_expansion.rulename to support templates + yield from self._reconstruct(item) + else: + yield item + + def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: + x = self._reconstruct(tree) + if postproc: + x = postproc(x) + y = [] + prev_item = '' + for item in x: + if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): + y.append(' ') + y.append(item) + prev_item = item + return ' '.join(y) grammar = r"""trig_doc: (directive | block)* -?statement: directive | triples "." | quotedtriples "." +?statement: directive | triples "." block: triplesorgraph | wrappedgraph | triples2 | "GRAPH" labelorsubject wrappedgraph triplesorgraph: labelorsubject (wrappedgraph | predicate_object_list ".") | quotation predicate_object_list "." triples2: blank_node_property_list predicate_object_list? "." | collection predicate_object_list "." wrappedgraph: "{" triplesblock? "}" -triplesblock: triples ("." triplesblock?)? | quotedtriples ("." triplesblock?)? +triplesblock: triples ("." triplesblock?)? labelorsubject: iri | blank_node directive: prefix_id | base | sparql_prefix | sparql_base prefix_id: "@prefix" PNAME_NS IRIREF "." @@ -298,9 +400,8 @@ def runNamespace(): triples: subject predicate_object_list | blank_node_property_list predicate_object_list? insidequotation: qtsubject verb qtobject -quotedtriples: triples compoundanno predicate_object_list: verb object_list (";" (verb object_list)?)* -?object_list: object ("," object )* +?object_list: object compoundanno? ("," object compoundanno?)* ?verb: predicate | /a/ ?subject: iri | blank_node | collection | quotation ?predicate: iri @@ -363,6 +464,11 @@ def runNamespace(): trig_lark = Lark(grammar, start="trig_doc", parser="lalr", maybe_placeholders = False) +class Print_Tree(Visitor): + def print_quotation(self, tree): + assert tree.data == "quotation" + print(tree.children) + from lark import Visitor, v_args quotation_list = [] quotation_dict = dict() @@ -374,9 +480,78 @@ def runNamespace(): assertedtriplelist = [] quoted_or_not = False both_quoted_and_asserted = False +object_annotation_list = [] +annotation_s_p_o = [] +annotation_dict = dict() +to_remove = [] +output = "" def myHash(text:str): return str(hashlib.md5(text.encode('utf-8')).hexdigest()) +class Expandanotation(Visitor): + global annotation_s_p_o, to_remove + def __init__(self): + super().__init__() + self.variable_list = [] + + def triples(self, var): + tri = Reconstructorv2(trig_lark).reconstruct(var) + if "{|" in tri: + if len(var.children) == 2: + predicate_object_list2 = var.children[1].children + subject = Reconstructorv2(trig_lark).reconstruct(var.children[0]) + po_list = [] + for x in range(0, len(predicate_object_list2)): + + predicate_or_object = Reconstructorv2(trig_lark).reconstruct(predicate_object_list2[x]) + po_list.append(predicate_or_object) + if len(po_list) == 2: + if "," in po_list[1]: + po_lists = po_list[1].split(",") + + for y in po_lists: + try: + object_annotation = y.split("{|",1) + o1 = object_annotation[0] + a1 = "{|"+object_annotation[1] + a1 = a1.strip() + a1_Dict = annotation_dict[a1] + spo_list = [subject,po_list[0],o1, a1_Dict] + annotation_s_p_o.append(spo_list) + except: + spo_list = [subject,po_list[0],y] + annotation_s_p_o.append(spo_list) + else: + object_annotation = po_list[1].split("{|",1) + o1 = object_annotation[0] + a1 = "{|"+object_annotation[1] + a1_Dict = annotation_dict[a1] + spo_list = [subject, po_list[0], o1, a1_Dict] + annotation_s_p_o.append(spo_list) + po_list = [] + to_remove.append(tri) + def compoundanno(self, var): + tri2 = Reconstructorv2(trig_lark).reconstruct(var) + + + for x in var.children[1].children: + test = Reconstructorv2(trig_lark).reconstruct(x) + if "{|" in test: + test123 = test.split("{|",1) + object = test123[0] + test123.pop(0) + test_annotation = "{|"+ "".join(test123) + result = annotation_dict[test_annotation] + if not tri2 in annotation_dict: + annotation_dict[tri2] = [object,result] + else: + annotation_dict[tri2].append(object) + annotation_dict[tri2].append(result) + else: + if not tri2 in annotation_dict: + annotation_dict[tri2] = [test] + else: + annotation_dict[tri2].append(test) class FindVariables(Visitor): def __init__(self): @@ -392,10 +567,8 @@ def quotation(self, var): vr = Reconstructor(trig_lark).reconstruct(var) vr = vr.replace(";","") - quotation_dict[qut] = str(myHash(qut)) + "RdfstarTriple" qut_hash = ":" + str(myHash(qut)) - # try: id = quotation_dict.get(vr) for x in quotation_dict: if x in vr: @@ -441,35 +614,6 @@ def collection(self, var): hasht2 = "_:" + t2 var.children[x] = Tree('iri', [Tree('prefixed_name', [Token('PNAME_LN', hasht2)])]) - def quotedtriples(self, var): - triple1 = None - subjecthash = "" - - for x in var.children: - if x.data == "triples": - triple1 = Reconstructor(trig_lark).reconstruct(x) - triple1 = triple1.replace(";","") - triple1 = "<<"+triple1+">>" - subjecthash = "_:" + str(myHash(triple1)) + "RdfstarTriple" - if not (triple1 in quotation_list): - quotation_list.append(triple1) - - quotation_dict[triple1] = str(myHash(triple1)) + "RdfstarTriple" - elif x.data == "compoundanno": - for y in x.children: - if (y != "{|") & (y!= "|}"): - count2 = 0 - quotationtriple = [] - for z in y.children: - count2+=1 - z2 = Reconstructor(trig_lark).reconstruct(z) - quotationtriple.append(z2) - if count2 ==2: - quotationtriple.insert(0, subjecthash) - quotationannolist.append(quotationtriple) - count2 = 0 - quotationtriple = [] - def triples(self, var): appends1 = [] @@ -492,7 +636,6 @@ def triples(self, var): appends1.append(Reconstructor(trig_lark).reconstruct(var)) appends1.append(" . \n") break - x2 = x2.replace(";","") appends1.append(x2) else: anyquotationin = False @@ -512,27 +655,21 @@ def insidequotation(self, var): if not (appends1 in vblist): vblist.append(appends1) - # def prefixed_name(self, children): - # print("prefixed_name") - # print("pn", self) - def prefix_id(self, children): - print("prefix_id") + pass def sparql_prefix(self, children): - print("sparql_prefix", children) prefix_list.append(children) def base(self, children): - print("base") base_directive, base_iriref = children - print("base", base_directive, base_iriref) + # print("base", base_directive, base_iriref) # Workaround for lalr parser token ambiguity in python 2.7 if base_directive.startswith('@') and base_directive != '@base': raise ValueError('Unexpected @base: ' + base_directive) def RDFstarParsings(rdfstarstring): - global quotationannolist, quotation_dict, vblist, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted + global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted, to_remove, annotation_s_p_o, output, annotation_dict quotationannolist = [] vblist = [] quotationreif = [] @@ -540,95 +677,160 @@ def RDFstarParsings(rdfstarstring): constructors = "" quoted_or_not = False both_quoted_and_asserted = False + output = "" + output_tree = "" + annotation_s_p_o = [] + to_remove = [] + annotation_dict = dict() tree = trig_lark.parse(rdfstarstring) + + tt = Expandanotation().visit(tree) + + tree_after = Reconstructorv2(trig_lark).reconstruct(tree) + + splittree_after = tree_after.split(">") + + PREFIX_substitute = dict() + for x in splittree_after: + + if "PREFIX" in x: + y = x + ">"+" " + "\n" + PREFIX_substitute[x+">"] = y + for z in PREFIX_substitute: + tree_after = tree_after.replace(z, "") + for z in PREFIX_substitute: + tree_after = PREFIX_substitute[z] + tree_after + + + for x in to_remove: + + x = x + " ." + + tree_after = tree_after.replace(x, "") + tree_after = tree_after+ "\n" # + if "PREFIX:" in tree_after: + tree_after = tree_after.replace("PREFIX:", "PREFIX :") + + def expand_to_rdfstar(x): + global output + + spo = "<<"+x[0] +" "+x[1] + " " + x[2]+">>" + try: + if len(x[3]) == 2: + + output += spo + " "+ x[3][0] +" "+x[3][1] + "." + "\n" # smart + + elif len(x[3]) == 3: + + output += spo + " "+ x[3][0] +" "+x[3][1] + "." + "\n" # smart + + newspolist = [spo, x[3][0],x[3][1], x[3][2]] + + expand_to_rdfstar(newspolist) + else: + + clist = [x[3][y:y+2] for y in range(0, len(x[3]),2)] + + for z in clist: + + expand_to_rdfstar([x[0],x[1],x[2],z]) + except: + pass + output = "" + for x in annotation_s_p_o: + + output +=x[0] +" "+ x[1] +" "+ x[2] + "." + "\n" + expand_to_rdfstar(x) + output_tree = tree_after+output + if ":G { }\n" in output_tree: + output_tree = output_tree.replace(":G { }\n", ":G {") + output_tree = output_tree+ "}" + + + tree = trig_lark.parse(output_tree) + at = FindVariables().visit(tree) for y in vblist: + for element_index in range(0, len(y)): if (y[element_index][0] == "_") & (not (element_index == 0)): y[element_index]=" "+y[element_index] result = "".join(y) - result = result.replace(" ", "") - if result in assertedtriplelist: - test1 = "<<"+result+">>" - if test1 in quotation_list: - both_quoted_and_asserted = True - else: - both_quoted_and_asserted = False - quoted_or_not = False + + if "standard reification" in result: + + + result = result.replace("standard reification", "") + constructors+=result else: - test2 = "<<"+result+">>" - if test2 in quotation_list: - both_quoted_and_asserted = False - quoted_or_not = True - else: - both_quoted_and_asserted = False - quoted_or_not = False - result = "<<"+result+">>" - if not (result in quotation_list): - for z in range(0,len(y)): - if "<<" in y[z]: - y[z] = y[z].replace(" ", "") - y[z] = "_:"+quotation_dict[y[z]] - myvalue = str(myHash(result)) - try: - subject = y[0] - predicate = y[1] - object = y[2] - except: - if len(y)==1: - result2 = y[0] - constructors+=result2 - constructors = constructors +".\n" - continue - if both_quoted_and_asserted: - next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + + result = result.replace(" ", "") + + if result in assertedtriplelist: + test1 = "<<"+result+">>" + if test1 in quotation_list: + both_quoted_and_asserted = True + else: + both_quoted_and_asserted = False + quoted_or_not = False else: - if quoted_or_not: - next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + + test2 = "<<"+result+">>" + if test2 in quotation_list: + both_quoted_and_asserted = False + quoted_or_not = True else: - next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + both_quoted_and_asserted = False + quoted_or_not = False + result = "<<"+result+">>" - constructors+=next_rdf_object - else: - value = quotation_dict[result] - for z in range(0,len(y)): - if "<<" in y[z]: - y[z] = "_:"+quotation_dict[y[z]] - subject = y[0] - predicate = y[1] - object = y[2] - if both_quoted_and_asserted: - next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" - else: - if quoted_or_not: - next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + if not (result in quotation_list): + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = y[z].replace(" ", "") + + y[z] = "_:"+quotation_dict[y[z]] + myvalue = str(myHash(result)) + + try: + subject = y[0] + predicate = y[1] + object = y[2] + except: + if len(y)==1: + result2 = y[0] + + constructors+=result2 + constructors = constructors +".\n" + continue + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" else: - next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" - constructors+=next_rdf_object - - for z in quotationannolist: - result1 = "".join(z) - result1 = "<<"+result1+">>" - if result1 in quotation_list: - both_quoted_and_asserted = True - else: - both_quoted_and_asserted = False - quoted_or_not = False - value = str(myHash(result1)) - subject = z[0] - predicate = z[1] - object = z[2] - if both_quoted_and_asserted: - next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" - else: - if quoted_or_not: - next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" - else: - next_rdf_object = "_:" + str(value) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + if quoted_or_not: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(myvalue) +"RdfstarTriple"+ '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" - constructors+=next_rdf_object + constructors+=next_rdf_object + else: + value = quotation_dict[result] + for z in range(0,len(y)): + if "<<" in y[z]: + y[z] = "_:"+quotation_dict[y[z]] + subject = y[0] + predicate = y[1] + object = y[2] + if both_quoted_and_asserted: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement, rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + if quoted_or_not: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:QuotedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + else: + next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" + + constructors+=next_rdf_object for x in range(0, len(prefix_list)): prefix_list[x] = Reconstructor(trig_lark).reconstruct(prefix_list[x]) constructors = prefix_list[x]+"\n"+constructors @@ -637,8 +839,6 @@ def RDFstarParsings(rdfstarstring): constructors = "PREFIX rdf: \n"+constructors - - if not (("PREFIX : " in constructors) or ("PREFIX:" in constructors)): constructors = "PREFIX : \n"+constructors @@ -867,7 +1067,8 @@ def feed(self, octets: Union[str, bytes]): return i = self.directiveOrStatement(s, j) if i < 0: - # print("# next char: %s" % s) + # print("# next char: %s" % s[j-5:j+5]) + # print("asdadasd", i, j) self.BadSyntax(s, j, "expected directive or statement") def directiveOrStatement(self, argstr: str, h: int) -> int: @@ -1262,7 +1463,10 @@ def path(self, argstr: str, i: int, res): def anonymousNode(self, ln: str): """Remember or generate a term for one of these _: anonymous nodes""" + # print("anonymousNode", self._anonymousNodes.get(ln, None), self._context, self._reason2) if ("RdfstarTriple" in ln): + # print("new object") + # ln = ln.replace("RdfstarTriple", "") term = self._rdfstartripleNodes.get(ln, None) if term is not None: return term @@ -1330,6 +1534,7 @@ def node(self, argstr: str, i: int, res, subjectAlready=None): argstr, i, "EOF when ']' expected after [ " ) if argstr[j] != "]": + # print("asdadasd", argstr[j-1], argstr[j-2], argstr[j-3], argstr[j-4], argstr[j-5]) self.BadSyntax(argstr, j, "']' expected") res.append(subj) return j + 1 @@ -1508,6 +1713,7 @@ def property_list(self, argstr: str, i: int, subj): self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 res: typing.List[Any] = [] + # print("node in propertylist", self.node(argstr, i, res, subj)) j = self.node(argstr, i, res, subj) if j < 0: self.BadSyntax(argstr, i, "bad {} or () or [] node after :- ") @@ -1522,13 +1728,14 @@ def property_list(self, argstr: str, i: int, subj): objs: typing.List[Any] = [] i = self.objectList(argstr, j, objs) + # print("objectList in propertylist", objs) if i < 0: self.BadSyntax(argstr, j, "objectList expected") for obj in objs: dira, sym = v[0] if "RdfstarTriple" in subj: - + # print("asdasdasd", obj) if "rdf-star" in str(obj): if len(quoted_triple_list) > 2: quoted_triple_list = [] @@ -1536,11 +1743,11 @@ def property_list(self, argstr: str, i: int, subj): if (rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement') in quoted_triple_list) & (not (subj in quoted_triple_list)): quoted_triple_list.append(subj) if "#object" in sym: - + # print("asdasdasd", quoted_triple_list) self.addingquotedRdfstarTriple(quoted_triple_list, dira) else: if dira == "->": - + # print("tests ->", self._context, sym, subj, obj) self.makeStatement((self._context, sym, subj, obj)) else: self.makeStatement((self._context, sym, obj, subj)) @@ -1579,6 +1786,7 @@ def commaSeparatedList(self, argstr: str, j, res, what): self.BadSyntax(argstr, i, "bad list content") def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: + # print("object in objectList") i = self.object(argstr, i, res) if i < 0: return -1 @@ -1622,7 +1830,9 @@ def uri_ref2(self, argstr: str, i: int, res): try: ns = self._bindings[pfx] except KeyError: + # print("testuri2", pfx, ln) if pfx == "_": # Magic prefix 2001/05/30, can be changed + # print("anonymousNode in uriref2") res.append(self.anonymousNode(ln)) return j if not self.turtle and pfx == "": @@ -1864,6 +2074,7 @@ def qname(self, argstr: str, i: int, res): return -1 def object(self, argstr: str, i: int, res): + # print("subject in object") j = self.subject(argstr, i, res) if j >= 0: return j @@ -1892,6 +2103,7 @@ def object(self, argstr: str, i: int, res): return -1 def nodeOrLiteral(self, argstr: str, i: int, res): + # print("node in nodeOrLiteral") j = self.node(argstr, i, res) startline = self.lines # Remember where for error messages if j >= 0: @@ -1952,6 +2164,7 @@ def nodeOrLiteral(self, argstr: str, i: int, res): j = i if argstr[j : j + 2] == "^^": res2: typing.List[Any] = [] + # print("nodeorLiteral") j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] res.append(self._store.newLiteral(s, dt, lang)) @@ -2164,6 +2377,7 @@ def id(self): return BNode("_:Formula%s" % self.number) def newBlankNode(self, uri=None, why=None): + # print("newBlankNode in Formula") if uri is None: self.counter += 1 bn = BNode("f%sb%s" % (self.uuid, self.counter)) @@ -2172,6 +2386,7 @@ def newBlankNode(self, uri=None, why=None): return bn def newRdfstarTriple(self, hashvalue, uri=None, why=None): + # print("newRdfstarTriple in Formula") if uri is None: # self.counter += 1 rdfstartriple = RdfstarTriple(hashvalue = hashvalue) @@ -2221,12 +2436,16 @@ def newBlankNode( uri: Optional[str] = None, why: Optional[Callable[[], None]] = None, ) -> BNode: + # print("newBlankNode in RDFSink") if isinstance(arg, Formula): + # print("newBlankNode in Formula", arg, uri) return arg.newBlankNode(uri) elif isinstance(arg, Graph) or arg is None: + # print("newBlankNode in RDFSink Graph", arg, uri, self.uuid, self.counter,"n%sb%s" % (self.uuid, self.counter)) self.counter += 1 bn = BNode("n%sb%s" % (self.uuid, self.counter)) else: + # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "b")) bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) return bn @@ -2240,12 +2459,17 @@ def newRdfstarTriple( why: Optional[Callable[[], None]] = None, hashvalue: Optional[str] = None ) -> RdfstarTriple: + # print("newRdflibRdfstartriple in Formula") if isinstance(arg, Formula): + # print("testsv2", arg, uri) return arg.newRdfstarTriple(hashvalue = hashvalue) elif isinstance(arg, Graph) or arg is None: + # print("newRdflibRdfstartriple", hashvalue) # self.counter += 1 rdfstartriple = RdfstarTriple(hashvalue =hashvalue) else: + # print("newRdflibRdfstartriple",hashvalue) + # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "rdfstartriple")) rdfstartriple = RdfstarTriple(hashvalue =hashvalue) return rdfstartriple @@ -2256,6 +2480,7 @@ def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Liter return Literal(s, lang=lang) def newList(self, n: typing.List[Any], f: Optional[Formula]): + # print("testnewlist") nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") if not n: return nil @@ -2280,6 +2505,7 @@ def setDefaultNamespace(self, *args) -> str: return ":".join(repr(n) for n in args) def makeStatement(self, quadruple, why=None) -> None: + # print("testmakeStatement", quadruple) f, p, s, o = quadruple if hasattr(p, "formula"): @@ -2292,6 +2518,7 @@ def makeStatement(self, quadruple, why=None) -> None: # print s, p, o, '.' self.graph.add((s, p, o)) elif isinstance(f, Formula): + # print("quotedgraph added") f.quotedgraph.add((s, p, o)) else: f.add((s, p, o)) @@ -2299,6 +2526,7 @@ def makeStatement(self, quadruple, why=None) -> None: # return str(quadruple) def makerdfstarStatement(self, quadruple, why=None) -> None: + # print("testmakeStatement", quadruple) f, hashnode, p, s, o = quadruple if hasattr(p, "formula"): @@ -2307,6 +2535,7 @@ def makerdfstarStatement(self, quadruple, why=None) -> None: s = self.normalise(f, s) p = self.normalise(f, p) o = self.normalise(f, o) + # print("testmakerdfstarStatement", hashnode, s,p,o) if f == self.rootFormula: # print s, p, o, '.' self.graph.addStarTriple((hashnode, s, p, o)) @@ -2420,6 +2649,7 @@ def parse( # stream = source.getByteStream() # p.loadStream(stream) + # print("tests", source) if hasattr(source, "file"): f = open(source.file.name, "rb") rdbytes = f.read() @@ -2432,6 +2662,7 @@ def parse( bp = rdbytes.decode("utf-8") ou = RDFstarParsings(bp) + # print(ou) p.feed(ou) p.endDoc() for prefix, namespace in p._bindings.items(): diff --git a/rdflib/plugins/parsers/turtlestar.py b/rdflib/plugins/parsers/turtlestar.py index f47daa5d2..78eb7709e 100644 --- a/rdflib/plugins/parsers/turtlestar.py +++ b/rdflib/plugins/parsers/turtlestar.py @@ -686,7 +686,7 @@ def base(self, children): raise ValueError('Unexpected @base: ' + base_directive) def RDFstarParsings(rdfstarstring): - global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted, to_remove, annotation_s_p_o, output + global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted, to_remove, annotation_s_p_o, output, annotation_dict quotationannolist = [] vblist = [] quotationreif = [] From 775c3d5b69c32bf859f5d0b933d8cdb5d1104438 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Sun, 18 Sep 2022 23:01:58 +1000 Subject: [PATCH 04/11] trigstar fix --- rdflib/plugins/parsers/trigstar.py | 157 +++++++++++++++++++++++-- rdflib/plugins/serializers/trigstar.py | 24 ++-- test_serializer_trigstar.py | 26 ++-- trigstar-evaluationtest.py | 68 +++++++++++ 4 files changed, 242 insertions(+), 33 deletions(-) create mode 100644 trigstar-evaluationtest.py diff --git a/rdflib/plugins/parsers/trigstar.py b/rdflib/plugins/parsers/trigstar.py index 81541ab0b..0c7d9bdaa 100644 --- a/rdflib/plugins/parsers/trigstar.py +++ b/rdflib/plugins/parsers/trigstar.py @@ -484,6 +484,7 @@ def print_quotation(self, tree): annotation_s_p_o = [] annotation_dict = dict() to_remove = [] +trig_graph = [] output = "" def myHash(text:str): @@ -558,6 +559,14 @@ def __init__(self): super().__init__() self.variable_list = [] + def labelorsubject(self, var): + + try: + vr = Reconstructor(trig_lark).reconstruct(var) + trig_graph.append(vr) + except: + pass + def quotation(self, var): qut = Reconstructor(trig_lark).reconstruct(var) qut = qut.replace(";", "") @@ -669,7 +678,7 @@ def base(self, children): raise ValueError('Unexpected @base: ' + base_directive) def RDFstarParsings(rdfstarstring): - global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted, to_remove, annotation_s_p_o, output, annotation_dict + global quotationannolist, vblist, quotation_dict, quotationreif, prefix_list, constructors, assertedtriplelist, quoted_or_not, both_quoted_and_asserted, to_remove, annotation_s_p_o, output, annotation_dict, trig_graph quotationannolist = [] vblist = [] quotationreif = [] @@ -682,6 +691,8 @@ def RDFstarParsings(rdfstarstring): annotation_s_p_o = [] to_remove = [] annotation_dict = dict() + trig_graph = [] + tree = trig_lark.parse(rdfstarstring) tt = Expandanotation().visit(tree) @@ -831,6 +842,8 @@ def expand_to_rdfstar(x): next_rdf_object = "_:" + str(value) + '\n' + " a rdfstar:AssertedStatement ;\n"+" rdf:subject "+subject+' ;\n'+" rdf:predicate "+predicate+" ;\n"+" rdf:object "+object+" ;\n"+".\n" constructors+=next_rdf_object + if len(trig_graph)!=0: + constructors=trig_graph[0]+"{\n"+constructors+"\n}" for x in range(0, len(prefix_list)): prefix_list[x] = Reconstructor(trig_lark).reconstruct(prefix_list[x]) constructors = prefix_list[x]+"\n"+constructors @@ -2616,6 +2629,117 @@ def hexify(ustr): s = s + ch return s.encode("latin-1") +class TrigSinkParser(SinkParser): + def directiveOrStatement(self, argstr, h): + + # import pdb; pdb.set_trace() + + i = self.skipSpace(argstr, h) + if i < 0: + return i # EOF + + j = self.graph(argstr, i) + if j >= 0: + return j + + j = self.sparqlDirective(argstr, i) + if j >= 0: + return j + + j = self.directive(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + + j = self.statement(argstr, i) + if j >= 0: + return self.checkDot(argstr, j) + + return j + + def labelOrSubject(self, argstr, i, res): + j = self.skipSpace(argstr, i) + if j < 0: + return j # eof + i = j + + j = self.uri_ref2(argstr, i, res) + if j >= 0: + return j + + if argstr[i] == "[": + j = self.skipSpace(argstr, i + 1) + if j < 0: + self.BadSyntax(argstr, i, "Expected ] got EOF") + if argstr[j] == "]": + res.append(self.blankNode()) + return j + 1 + return -1 + + def graph(self, argstr, i): + """ + Parse trig graph, i.e. + + = { .. triples .. } + + return -1 if it doesn't look like a graph-decl + raise Exception if it looks like a graph, but isn't. + """ + + # import pdb; pdb.set_trace() + j = self.sparqlTok("GRAPH", argstr, i) # optional GRAPH keyword + if j >= 0: + i = j + + r = [] + j = self.labelOrSubject(argstr, i, r) + if j >= 0: + graph = r[0] + i = j + else: + graph = self._store.graph.identifier # hack + + j = self.skipSpace(argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "EOF found when expected graph") + + if argstr[j : j + 1] == "=": # optional = for legacy support + + i = self.skipSpace(argstr, j + 1) + if i < 0: + self.BadSyntax(argstr, i, "EOF found when expecting '{'") + else: + i = j + + if argstr[i : i + 1] != "{": + return -1 # the node wasn't part of a graph + + j = i + 1 + + oldParentContext = self._parentContext + self._parentContext = self._context + reason2 = self._reason2 + self._reason2 = becauseSubGraph + self._context = self._store.newGraph(graph) + print(self._context) + while 1: + i = self.skipSpace(argstr, j) + if i < 0: + self.BadSyntax(argstr, i, "needed '}', found end.") + + if argstr[i : i + 1] == "}": + j = i + 1 + break + + j = self.directiveOrStatement(argstr, i) + if j < 0: + self.BadSyntax(argstr, i, "expected statement or '}'") + + self._context = self._parentContext + self._reason2 = reason2 + self._parentContext = oldParentContext + # res.append(subj.close()) # No use until closed + return j + class TrigParser(Parser): @@ -2635,14 +2759,32 @@ def parse( turtle: bool = True, ): if encoding not in [None, "utf-8"]: - raise ParserError( - "N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding + raise Exception( + ("TriG files are always utf-8 encoded, ", "I was passed: %s") % encoding ) - sink = RDFSink(graph) + # we're currently being handed a Graph, not a ConjunctiveGraph + print("Contextawareasdasdasdasd\n\n\n\n", graph.store.context_aware) + assert graph.store.context_aware, "TriG Parser needs a context-aware store!" + + conj_graph = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) + conj_graph.default_context = graph # TODO: CG __init__ should have a + # default_context arg + # TODO: update N3Processor so that it can use conj_graph as the sink + conj_graph.namespace_manager = graph.namespace_manager + + sink = RDFSink(conj_graph) - baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") - p = SinkParser(sink, baseURI=baseURI, turtle=turtle) + baseURI = conj_graph.absolutize( + source.getPublicId() or source.getSystemId() or "" + ) + p = TrigSinkParser(sink, baseURI=baseURI, turtle=True) + + # return ??? + # sink = RDFSink(graph) + + # baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") + # p = SinkParser(sink, baseURI=baseURI, turtle=turtle) # N3 parser prefers str stream # stream = source.getCharacterStream() # if not stream: @@ -2650,6 +2792,7 @@ def parse( # p.loadStream(stream) # print("tests", source) + if hasattr(source, "file"): f = open(source.file.name, "rb") rdbytes = f.read() @@ -2666,4 +2809,4 @@ def parse( p.feed(ou) p.endDoc() for prefix, namespace in p._bindings.items(): - graph.bind(prefix, namespace) + conj_graph.bind(prefix, namespace) diff --git a/rdflib/plugins/serializers/trigstar.py b/rdflib/plugins/serializers/trigstar.py index 33b59a049..36451c369 100644 --- a/rdflib/plugins/serializers/trigstar.py +++ b/rdflib/plugins/serializers/trigstar.py @@ -3,7 +3,7 @@ See for details about the format. """ # from this import d -from typing import IO, Optional, Type, Union +from typing import IO, Optional, Type, Union, TYPE_CHECKING import json from rdflib.graph import Graph, ConjunctiveGraph from rdflib.term import Literal, URIRef, Node, BNode, RdfstarTriple @@ -27,18 +27,18 @@ class TrigstarSerializer(Serializer): def __init__(self, store: Union[Graph, ConjunctiveGraph]): self.default_context: Optional[Node] - self.graph_type: Type[Graph] - if isinstance(store, ConjunctiveGraph): - self.graph_type = ConjunctiveGraph + # print("init", list(store.contexts())) + if store.context_aware: + if TYPE_CHECKING: + assert isinstance(store, ConjunctiveGraph) self.contexts = list(store.contexts()) + # print("sadasd", [store]) + self.default_context = store.default_context.identifier if store.default_context: - self.default_context = store.default_context self.contexts.append(store.default_context) - else: - self.default_context = None else: - self.graph_type = Graph self.contexts = [store] + # print("asdasdas", store.default_context.identifier) self.default_context = None Serializer.__init__(self, store) @@ -527,11 +527,9 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o if(isinstance(predicate, rdflib.term.URIRef)): predicate = "<"+str(predicate)+">" - output = output+subject+" "+predicate+" "+object+" ."+"\n" - - if output is not None: - output = "_:"+str(myHash(output))+ "{\n"+ output + "}" - stream.write(output.encode()) + output = subject+" "+predicate+" "+object+" <"+str(g.identifier)+"> "" ."+"\n" + if output is not None: + stream.write(output.encode()) def _iri_or_bn(self, i_): if isinstance(i_, URIRef): diff --git a/test_serializer_trigstar.py b/test_serializer_trigstar.py index d265bcc33..8e208a020 100644 --- a/test_serializer_trigstar.py +++ b/test_serializer_trigstar.py @@ -6,7 +6,7 @@ from rdflib.exceptions import ParserError -from rdflib import Graph +from rdflib import Graph, ConjunctiveGraph from rdflib.util import guess_format @@ -19,50 +19,50 @@ from rdflib.namespace import RDF from rdflib.namespace import FOAF -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-basic-01.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-basic-02.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-bnode-01.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-bnode-02.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-bnode-03.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-compound.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-inside-01.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-inside-02.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-nested-01.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-syntax-nested-02.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-annotation-1.trig", format = "trigs") print(g.serialize(format = "trigstar")) -g = Graph() +g = ConjunctiveGraph() g.parse(data="test/trig-star/trig-star-annotation-2.trig", format = "trigs") print(g.serialize(format = "trigstar")) diff --git a/trigstar-evaluationtest.py b/trigstar-evaluationtest.py new file mode 100644 index 000000000..fe8faebb8 --- /dev/null +++ b/trigstar-evaluationtest.py @@ -0,0 +1,68 @@ +import pytest + +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from rdflib.exceptions import ParserError + +from rdflib import Graph, ConjunctiveGraph +from rdflib.util import guess_format + + +from rdflib.plugin import register +from rdflib.parser import Parser +from rdflib.serializer import Serializer + +import rdflib +from rdflib import URIRef +from rdflib.namespace import RDF +from rdflib.namespace import FOAF + +g = ConjunctiveGraph() +g.parse(data="test/trigstar-evaluation/trig-star-eval-01.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-02.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-bnode-1.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-bnode-2.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-annotation-1.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-annotation-2.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-annotation-3.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-annotation-4.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-annotation-5.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-quoted-annotation-1.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-quoted-annotation-2.trig", format = "trigs") +print(g.serialize(format = "trigstar")) + +g = ConjunctiveGraph() +g.parse("test/trigstar-evaluation/trig-star-eval-quoted-annotation-3.trig", format = "trigs") +print(g.serialize(format = "trigstar")) From 13e9084b30e054cb6658b19a036d279b39548e23 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Sun, 18 Sep 2022 23:05:36 +1000 Subject: [PATCH 05/11] adding trigstar evaluation test --- test/trigstar-evaluation/trig-star-eval-01.trig | 3 +++ test/trigstar-evaluation/trig-star-eval-02.trig | 3 +++ .../trig-star-eval-annotation-1.trig | 3 +++ .../trig-star-eval-annotation-2.trig | 12 ++++++++++++ .../trig-star-eval-annotation-3.trig | 7 +++++++ .../trig-star-eval-annotation-4.trig | 3 +++ .../trig-star-eval-annotation-5.trig | 4 ++++ test/trigstar-evaluation/trig-star-eval-bnode-1.trig | 6 ++++++ test/trigstar-evaluation/trig-star-eval-bnode-2.trig | 6 ++++++ .../trig-star-eval-quoted-annotation-1.trig | 3 +++ .../trig-star-eval-quoted-annotation-2.trig | 3 +++ .../trig-star-eval-quoted-annotation-3.trig | 3 +++ 12 files changed, 56 insertions(+) create mode 100644 test/trigstar-evaluation/trig-star-eval-01.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-02.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-annotation-1.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-annotation-2.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-annotation-3.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-annotation-4.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-annotation-5.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-bnode-1.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-bnode-2.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-quoted-annotation-1.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-quoted-annotation-2.trig create mode 100644 test/trigstar-evaluation/trig-star-eval-quoted-annotation-3.trig diff --git a/test/trigstar-evaluation/trig-star-eval-01.trig b/test/trigstar-evaluation/trig-star-eval-01.trig new file mode 100644 index 000000000..924f2c826 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-01.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {<<:s :p :o>> :q :z .} diff --git a/test/trigstar-evaluation/trig-star-eval-02.trig b/test/trigstar-evaluation/trig-star-eval-02.trig new file mode 100644 index 000000000..eb460dd2f --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-02.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:a :q <<:s :p :o>> .} diff --git a/test/trigstar-evaluation/trig-star-eval-annotation-1.trig b/test/trigstar-evaluation/trig-star-eval-annotation-1.trig new file mode 100644 index 000000000..9364a9cf4 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-annotation-1.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:s :p :o {| :r :z |} .} diff --git a/test/trigstar-evaluation/trig-star-eval-annotation-2.trig b/test/trigstar-evaluation/trig-star-eval-annotation-2.trig new file mode 100644 index 000000000..0dc1e05fa --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-annotation-2.trig @@ -0,0 +1,12 @@ +PREFIX : +PREFIX xsd: + +:G { + :s :p :o {| :source [ :graph ; + :date "2020-01-20"^^xsd:date + ] ; + :source [ :graph ; + :date "2020-12-31"^^xsd:date + ] + |} . +} diff --git a/test/trigstar-evaluation/trig-star-eval-annotation-3.trig b/test/trigstar-evaluation/trig-star-eval-annotation-3.trig new file mode 100644 index 000000000..336c745eb --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-annotation-3.trig @@ -0,0 +1,7 @@ +PREFIX : + +:G { + :s :p :o {| :a :b |}; + :p2 :o2 {| :a2 :b2 |}, + :o3 {| :a3 :b3 |}. +} diff --git a/test/trigstar-evaluation/trig-star-eval-annotation-4.trig b/test/trigstar-evaluation/trig-star-eval-annotation-4.trig new file mode 100644 index 000000000..d6d83da31 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-annotation-4.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:s :p :o {| :a :b {| :a2 :b2 |} |}.} diff --git a/test/trigstar-evaluation/trig-star-eval-annotation-5.trig b/test/trigstar-evaluation/trig-star-eval-annotation-5.trig new file mode 100644 index 000000000..59d830407 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-annotation-5.trig @@ -0,0 +1,4 @@ +PREFIX : + +:G {:s :p :o1, :o2 {| :a :b |} .} + diff --git a/test/trigstar-evaluation/trig-star-eval-bnode-1.trig b/test/trigstar-evaluation/trig-star-eval-bnode-1.trig new file mode 100644 index 000000000..1ddce6d74 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-bnode-1.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + _:b :p :o . + <<_:b :p :o>> :q :z . +} diff --git a/test/trigstar-evaluation/trig-star-eval-bnode-2.trig b/test/trigstar-evaluation/trig-star-eval-bnode-2.trig new file mode 100644 index 000000000..3d2cb2a09 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-bnode-2.trig @@ -0,0 +1,6 @@ +PREFIX : + +:G { + _:a :p1 _:a . + <<_:a :p1 _:a >> :q <<_:a :p2 :o>> . +} diff --git a/test/trigstar-evaluation/trig-star-eval-quoted-annotation-1.trig b/test/trigstar-evaluation/trig-star-eval-quoted-annotation-1.trig new file mode 100644 index 000000000..a56b84099 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-quoted-annotation-1.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:s :p :o {| :r <<:s1 :p1 :o1>> |} .} diff --git a/test/trigstar-evaluation/trig-star-eval-quoted-annotation-2.trig b/test/trigstar-evaluation/trig-star-eval-quoted-annotation-2.trig new file mode 100644 index 000000000..ed4dec930 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-quoted-annotation-2.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {<<:s1 :p1 :o1>> :p :o {| :r :z |} .} diff --git a/test/trigstar-evaluation/trig-star-eval-quoted-annotation-3.trig b/test/trigstar-evaluation/trig-star-eval-quoted-annotation-3.trig new file mode 100644 index 000000000..5b1006733 --- /dev/null +++ b/test/trigstar-evaluation/trig-star-eval-quoted-annotation-3.trig @@ -0,0 +1,3 @@ +PREFIX : + +:G {:s :p <<:s2 :p2 :o2>> {| :r :z |} .} From e0d5746b26d43c9e476b995ee14304173206329f Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Sun, 18 Sep 2022 23:11:42 +1000 Subject: [PATCH 06/11] updating comments --- rdflib/plugins/parsers/trigstar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdflib/plugins/parsers/trigstar.py b/rdflib/plugins/parsers/trigstar.py index 0c7d9bdaa..2bd700bd2 100644 --- a/rdflib/plugins/parsers/trigstar.py +++ b/rdflib/plugins/parsers/trigstar.py @@ -2720,7 +2720,7 @@ def graph(self, argstr, i): reason2 = self._reason2 self._reason2 = becauseSubGraph self._context = self._store.newGraph(graph) - print(self._context) + # print(self._context) while 1: i = self.skipSpace(argstr, j) if i < 0: @@ -2764,7 +2764,7 @@ def parse( ) # we're currently being handed a Graph, not a ConjunctiveGraph - print("Contextawareasdasdasdasd\n\n\n\n", graph.store.context_aware) + # print("Contextawareasdasdasdasd\n\n\n\n", graph.store.context_aware) assert graph.store.context_aware, "TriG Parser needs a context-aware store!" conj_graph = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) From b8c5ef7a0a8c0fb5d21264f0c215d7bbf8b2ba33 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Wed, 21 Sep 2022 04:58:42 +1000 Subject: [PATCH 07/11] fixing serializer logics, shift to pure ntriples serialization . --- rdflib/plugins/serializers/turtlestar.py | 75 +++---- test/rdf-star/test_Rdfstar.py | 210 ------------------ .../turtle-star/nt-ttl-star-bad-syntax-1.ttl | 1 - .../turtle-star/nt-ttl-star-bad-syntax-2.ttl | 1 - .../turtle-star/nt-ttl-star-bad-syntax-3.ttl | 1 - .../turtle-star/nt-ttl-star-bad-syntax-4.ttl | 1 - .../turtle-star/nt-ttl-star-bnode-1.ttl | 2 - .../turtle-star/nt-ttl-star-bnode-2.ttl | 2 - .../turtle-star/nt-ttl-star-nested-1.ttl | 3 - .../turtle-star/nt-ttl-star-nested-2.ttl | 3 - .../turtle-star/nt-ttl-star-syntax-1.ttl | 1 - .../turtle-star/nt-ttl-star-syntax-2.ttl | 1 - .../turtle-star/nt-ttl-star-syntax-3.ttl | 1 - .../turtle-star/nt-ttl-star-syntax-4.ttl | 1 - .../turtle-star/nt-ttl-star-syntax-5.ttl | 1 - .../turtle-star/turtle-star-annotation-1.ttl | 3 - .../turtle-star/turtle-star-annotation-2.ttl | 10 - .../turtle-star/turtle-star-syntax-bad-01.ttl | 4 - .../turtle-star/turtle-star-syntax-bad-02.ttl | 4 - .../turtle-star/turtle-star-syntax-bad-03.ttl | 4 - .../turtle-star/turtle-star-syntax-bad-04.ttl | 4 - .../turtle-star/turtle-star-syntax-bad-05.ttl | 3 - .../turtle-star/turtle-star-syntax-bad-06.ttl | 4 - .../turtle-star/turtle-star-syntax-bad-07.ttl | 3 - .../turtle-star/turtle-star-syntax-bad-08.ttl | 3 - .../turtle-star-syntax-bad-ann-1.ttl | 6 - .../turtle-star-syntax-bad-ann-2.ttl | 3 - .../turtle-star-syntax-basic-01.ttl | 4 - .../turtle-star-syntax-basic-02.ttl | 4 - .../turtle-star-syntax-bnode-01.ttl | 4 - .../turtle-star-syntax-bnode-02.ttl | 4 - .../turtle-star-syntax-bnode-03.ttl | 3 - .../turtle-star-syntax-compound.ttl | 11 - .../turtle-star-syntax-inside-01.ttl | 4 - .../turtle-star-syntax-inside-02.ttl | 5 - .../turtle-star-syntax-nested-01.ttl | 7 - .../turtle-star-syntax-nested-02.ttl | 5 - test_serializer_turtlestar.py | 114 +++++----- 38 files changed, 84 insertions(+), 436 deletions(-) delete mode 100644 test/rdf-star/test_Rdfstar.py delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl delete mode 100644 test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-annotation-1.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-annotation-2.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl delete mode 100644 test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl diff --git a/rdflib/plugins/serializers/turtlestar.py b/rdflib/plugins/serializers/turtlestar.py index ee8dc0f11..4d3de95a0 100644 --- a/rdflib/plugins/serializers/turtlestar.py +++ b/rdflib/plugins/serializers/turtlestar.py @@ -68,41 +68,38 @@ def serialize( result_subject = "" result_object = "" + def serializing_spo(term): + if isinstance(term, rdflib.term.URIRef): + output = "<"+str(term)+">" + elif isinstance(term, rdflib.term.Literal): + output = term._literal_n3(use_plain=True) + elif isinstance(term, rdflib.term.BNode): + output = "_:"+str(term) + + return output + def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary): quoted_Bnode_or_not = False - if type(node) == rdflib.term.BNode: for s, p, o in g.triples((node, None, None)): if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): pass - # print("here", node) - if isinstance(p, rdflib.term.URIRef): - p = "<"+str(p)+">" - elif isinstance(p, rdflib.term.Literal): - p = p._literal_n3(use_plain=True) - - if isinstance(s, rdflib.term.BNode): - s = "_:"+str(s) - - if isinstance(o, rdflib.term.URIRef): - o = "<"+str(o)+">" - elif isinstance(o, rdflib.term.Literal): - o = o._literal_n3(use_plain=True) - elif isinstance(o, rdflib.term.BNode): - o = "_:"+str(o) + + p = serializing_spo(p) + s = serializing_spo(s) + o = serializing_spo(o) if not (node in blanknode_dictionary): - blanknode_dictionary[node] = [p, o] + blanknode_dictionary[node] = [[p, o]] elif ((p in blanknode_dictionary[node]) & (o in blanknode_dictionary[node])): pass else: + if not [p,o] in blanknode_dictionary[node]: + blanknode_dictionary[node].append([p,o]) - blanknode_dictionary[node].append(";") - blanknode_dictionary[node].append(p) - blanknode_dictionary[node].append(o) else: @@ -172,21 +169,10 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) - if (isinstance(subject, rdflib.term.URIRef)): - - subject = "<"+str(subject)+">" - elif isinstance(subject, rdflib.term.BNode): - subject = "_:"+str(subject) - elif isinstance(subject, rdflib.term.Literal): - subject = subject._literal_n3(use_plain=True) + subject = serializing_spo(subject) - if (isinstance(object, rdflib.term.URIRef)): + object = serializing_spo(object) - object = "<"+str(object)+">" - elif isinstance(object, rdflib.term.Literal): - object = object._literal_n3(use_plain=True) - elif isinstance(object, rdflib.term.BNode): - object = "_:"+str(object) if isinstance(predicate, rdflib.term.URIRef): predicate = "<"+str(predicate)+">" @@ -475,13 +461,10 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o else: if (subject in blanknode_dictionary): if(len(blanknode_dictionary[subject])>2): - subject = "["+"".join(blanknode_dictionary[subject])+"]" - else: - subject = "[]" - else: - subject = "[]" - if subject == "[]": - + for x in blanknode_dictionary[subject]: + output ="_:" + thenode_id + x[0] + x[1]+". \n" + if output is not None: + stream.write(output.encode()) subject = " _:"+thenode_id properties = [] @@ -509,14 +492,10 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o else: if (object in blanknode_dictionary): if(len(blanknode_dictionary[object])>2): - object = "["+"".join(blanknode_dictionary[object])+"]" - else: - object = "[]" - else: - object = "[]" - - if object == "[]": - + for x in blanknode_dictionary[object]: + output = "_:" + thenode_id + x[0] + x[1] + ". \n" + if output is not None: + stream.write(output.encode()) object = " _:"+thenode_id properties = [] diff --git a/test/rdf-star/test_Rdfstar.py b/test/rdf-star/test_Rdfstar.py deleted file mode 100644 index 5f88e5343..000000000 --- a/test/rdf-star/test_Rdfstar.py +++ /dev/null @@ -1,210 +0,0 @@ - -import pytest - -from pathlib import Path -from shutil import copyfile -from tempfile import TemporaryDirectory - -from rdflib.exceptions import ParserError - -from rdflib import Graph -from rdflib.util import guess_format - - -from rdflib.plugin import register -from rdflib.parser import Parser - -register( - "ttls", - Parser, - "rdflib.plugins.parsers.turtlestar", - "TurtleParser", -) - -# tests should be past -def test_TurtlePositiveSyntax_subject(): - g = Graph() - assert isinstance((g.parse(data="turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls"), Graph)) - -# def test_TurtlePositiveSyntax_object(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-basic-02.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_quotedtripleinsideblankNodePropertyList(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-inside-01.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_quotedtripleinsidecollection(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-inside-02.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_nestedquotedtriplesubjectposition(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-nested-01.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_nestedquotedtripleobjectposition(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-nested-02.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_compoundforms(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-compound.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_blanknodesubject(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bnode-01.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_blanknodeobject(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bnode-02.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_blanknode(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bnode-03.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Annotationform(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-annotation-1.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Annotationexample(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/turtle-star-annotation-2.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_subjectquotedtriple(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-1.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_objectquotedtriple(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-2.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_subjectandobjectquotedtriples(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-3.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_whitespaceandterms(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-4.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Nestednowhitespace(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-syntax-5.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Blanknodesubject(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-bnode-1.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Blanknodeobject(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-bnode-2.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Nestedsubjectterm(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-nested-1.ttl"), Graph)) - -# def test_TurtlePositiveSyntax_Nestedsubjectterm(): -# g = Graph() -# assert isinstance((g.parse("turtle-star/nt-ttl-star-nested-2.ttl"), Graph)) - -# # tests should be broken - -# def test_TurtleNegativeSyntax_Badquotedtripleliteralsubject(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-1.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_Badquotedtripleliteralsubject(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-2.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_Badquotedtripleliteralpredicate(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-3.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_Badquotedtripleblanknodepredicate(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/nt-ttl-star-bad-syntax-4.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_badquotedtripleaspredicate(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-01.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_badquotedtripleoutsidetriple(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-02.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_collectionlistinquotedtriple(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-03.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_badliteralinsubjectpositionofquotedtriple(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-04.ttl"), Graph)) -# except: -# assert True - -# def test_TurtleNegativeSyntax_blanknodeaspredicateinquotedtriple(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-05.ttl"), Graph)) -# except: -# assert True - -# def test_TurtlePositiveSyntax_compoundblanknodeexpression(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-06.ttl"), Graph)) -# except: -# assert True - -# def test_TurtlePositiveSyntax_ncompletequotetriple(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-07.ttl"), Graph)) -# except: -# assert True - -# def test_TurtlePositiveSyntax_overlongquotedtriple(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-08.ttl"), Graph)) -# except: -# assert True - -# def test_TurtlePositiveSyntax_emptyannotation(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-ann-1.ttl"), Graph)) -# except: -# assert True - -# def test_TurtlePositiveSyntax_tripleasannotation(): -# g = Graph() -# try: -# assert isinstance((g.parse("turtle-star/turtle-star-syntax-bad-ann-2.ttl"), Graph)) -# except: -# assert True - -if __name__ == "__main__": - pytest.main() diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl deleted file mode 100644 index 401f4b861..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-1.ttl +++ /dev/null @@ -1 +0,0 @@ - << >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl deleted file mode 100644 index 1e47b3600..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-2.ttl +++ /dev/null @@ -1 +0,0 @@ -<< "XYZ" >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl deleted file mode 100644 index eaeb6f2a5..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-3.ttl +++ /dev/null @@ -1 +0,0 @@ -<< "XYZ" >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl deleted file mode 100644 index af41d2064..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-bad-syntax-4.ttl +++ /dev/null @@ -1 +0,0 @@ -<< _:label >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl deleted file mode 100644 index 2d94448ce..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-bnode-1.ttl +++ /dev/null @@ -1,2 +0,0 @@ -_:b0 . -<< _:b0 >> "ABC" . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl deleted file mode 100644 index 2dc337a93..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-bnode-2.ttl +++ /dev/null @@ -1,2 +0,0 @@ - _:b1 . -<< _:b1 >> "456"^^ . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl deleted file mode 100644 index d6a50cbee..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-nested-1.ttl +++ /dev/null @@ -1,3 +0,0 @@ - . -<< >> . -<< << >> >> "1"^^ . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl deleted file mode 100644 index 7a8d3131b..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-nested-2.ttl +++ /dev/null @@ -1,3 +0,0 @@ - . - << >> . -<< << >> >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl deleted file mode 100644 index 7f2be99c3..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-syntax-1.ttl +++ /dev/null @@ -1 +0,0 @@ -<< >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl deleted file mode 100644 index 5aa2cb200..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-syntax-2.ttl +++ /dev/null @@ -1 +0,0 @@ - << >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl deleted file mode 100644 index 16b9420af..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-syntax-3.ttl +++ /dev/null @@ -1 +0,0 @@ -<< >> << >> . diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl deleted file mode 100644 index 6dc18c359..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-syntax-4.ttl +++ /dev/null @@ -1 +0,0 @@ -<<>><<>>. diff --git a/test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl b/test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl deleted file mode 100644 index 041b85739..000000000 --- a/test/rdf-star/turtle-star/nt-ttl-star-syntax-5.ttl +++ /dev/null @@ -1 +0,0 @@ -<<<<>><<>>>><<<<>><<>>>>. \ No newline at end of file diff --git a/test/rdf-star/turtle-star/turtle-star-annotation-1.ttl b/test/rdf-star/turtle-star/turtle-star-annotation-1.ttl deleted file mode 100644 index fdba9df1d..000000000 --- a/test/rdf-star/turtle-star/turtle-star-annotation-1.ttl +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX : - -:s :p :o {| :r :z |} . diff --git a/test/rdf-star/turtle-star/turtle-star-annotation-2.ttl b/test/rdf-star/turtle-star/turtle-star-annotation-2.ttl deleted file mode 100644 index 06ef8eaf4..000000000 --- a/test/rdf-star/turtle-star/turtle-star-annotation-2.ttl +++ /dev/null @@ -1,10 +0,0 @@ -PREFIX : -PREFIX xsd: - -:s :p :o {| :source [ :graph ; - :date "2020-01-20"^^xsd:date - ] ; - :source [ :graph ; - :date "2020-12-31"^^xsd:date - ] - |} . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl deleted file mode 100644 index 4a41e436c..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-01.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p :o . -:x <<:s :p :o>> 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl deleted file mode 100644 index 8a1fbb388..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-02.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p :o . -<<:s :p :o>> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl deleted file mode 100644 index 3952b24b6..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-03.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p ("abc") . -<<:s :p ("abc") >> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl deleted file mode 100644 index a69b6469d..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-04.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p :o . -<<3 :p :o >> :q :z . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl deleted file mode 100644 index b1a45960a..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-05.ttl +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX : - -<<:s [] :o>> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl deleted file mode 100644 index 687ef35c8..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-06.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - - -<<:s :p [ :p1 :o1 ] >> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl deleted file mode 100644 index 329e6d39b..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-07.ttl +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX : - -:s :p << :p :r >> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl deleted file mode 100644 index 72ceff2e2..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-08.ttl +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX : - -:s :p << :g :s :p :o >> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl deleted file mode 100644 index 7f654f4eb..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-1.ttl +++ /dev/null @@ -1,6 +0,0 @@ -PREFIX : - -SELECT * { - :s :p :o {| |} . -} - diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl deleted file mode 100644 index 1abed88fb..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bad-ann-2.ttl +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX : - -:a :b :c {| :s :p :o |} . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl deleted file mode 100644 index 3874f92ef..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-basic-01.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p :o . -<<:s :p :o>> :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl deleted file mode 100644 index a2b95e77d..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-basic-02.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p :o . -:x :p <<:s :p :o>> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl deleted file mode 100644 index 6591a66e1..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-01.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -_:a :p :o . -<<_:a :p :o >> :q 456 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl deleted file mode 100644 index ac99ad739..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-02.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p _:a . -<<:s :p _:a >> :q 456 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl deleted file mode 100644 index 95f8cf5f2..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-bnode-03.ttl +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX : - -<<[] :p [] >> :q :z . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl deleted file mode 100644 index df83d9704..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-compound.ttl +++ /dev/null @@ -1,11 +0,0 @@ -PREFIX : - - -:x :r :z . -:a :b :c . -<<:a :b :c>> :r :z . -<<:x :r :z >> :p <<:a :b :c>> . - -<< <<:x :r :z >> :p <<:a :b :c>> >> - :q -<< <<:x :r :z >> :p <<:a :b :c>> >> . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl deleted file mode 100644 index ff87a146f..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-inside-01.ttl +++ /dev/null @@ -1,4 +0,0 @@ -PREFIX : - -:s :p :o . -[ :q <<:s :p :o>> ] :b :c . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl deleted file mode 100644 index a6f82e027..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-inside-02.ttl +++ /dev/null @@ -1,5 +0,0 @@ -PREFIX : - -:s :p :o1 . -:s :p :o2 . -( <<:s :p :o1>> ( <<:s :p :o1>> <<:s :p :o2>> ) ) :q 123 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl deleted file mode 100644 index 93a936cab..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-nested-01.ttl +++ /dev/null @@ -1,7 +0,0 @@ -PREFIX : - -:s :p :o . - -<<:s :p :o >> :r :z . - -<< <<:s :p :o >> :r :z >> :q 1 . diff --git a/test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl b/test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl deleted file mode 100644 index aec49ef4a..000000000 --- a/test/rdf-star/turtle-star/turtle-star-syntax-nested-02.ttl +++ /dev/null @@ -1,5 +0,0 @@ -PREFIX : - -:s :p :o . -:a :q <<:s :p :o >> . -<< :a :q <<:s :p :o >>>> :r :z . diff --git a/test_serializer_turtlestar.py b/test_serializer_turtlestar.py index f7d08533b..695f55a29 100644 --- a/test_serializer_turtlestar.py +++ b/test_serializer_turtlestar.py @@ -19,45 +19,45 @@ from rdflib.namespace import RDF from rdflib.namespace import FOAF -g = Graph() -g.parse(data="test/turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse(data="test/turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-basic-02.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-basic-02.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-inside-01.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-inside-01.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-inside-02.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-inside-02.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-nested-01.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-nested-01.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-nested-02.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-nested-02.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-compound.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-compound.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-bnode-01.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-bnode-01.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-bnode-02.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-bnode-02.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/turtle-star-syntax-bnode-03.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/turtle-star-syntax-bnode-03.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) g = Graph() g.parse("test/turtle-star/turtle-star-annotation-1.ttl", format = "ttls") @@ -67,38 +67,38 @@ g.parse("test/turtle-star/turtle-star-annotation-2.ttl", format = "ttls") print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-syntax-1.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-syntax-1.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-syntax-2.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-syntax-2.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-syntax-3.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-syntax-3.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-syntax-4.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-syntax-4.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-syntax-5.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-syntax-5.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-bnode-1.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-bnode-1.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-bnode-2.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-bnode-2.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-nested-1.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-nested-1.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) -g = Graph() -g.parse("test/turtle-star/nt-ttl-star-nested-2.ttl", format = "ttls") -print(g.serialize(format = "ttlstar")) +# g = Graph() +# g.parse("test/turtle-star/nt-ttl-star-nested-2.ttl", format = "ttls") +# print(g.serialize(format = "ttlstar")) From 0631a67b979b02beecf165c1a134bbd6ce325caf Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Wed, 21 Sep 2022 05:23:22 +1000 Subject: [PATCH 08/11] fixing turtle-star serializer --- rdflib/plugins/serializers/turtlestar.py | 47 ++++++---- test_serializer_turtlestar.py | 114 +++++++++++------------ 2 files changed, 87 insertions(+), 74 deletions(-) diff --git a/rdflib/plugins/serializers/turtlestar.py b/rdflib/plugins/serializers/turtlestar.py index 4d3de95a0..16c87dd29 100644 --- a/rdflib/plugins/serializers/turtlestar.py +++ b/rdflib/plugins/serializers/turtlestar.py @@ -68,16 +68,6 @@ def serialize( result_subject = "" result_object = "" - def serializing_spo(term): - if isinstance(term, rdflib.term.URIRef): - output = "<"+str(term)+">" - elif isinstance(term, rdflib.term.Literal): - output = term._literal_n3(use_plain=True) - elif isinstance(term, rdflib.term.BNode): - output = "_:"+str(term) - - return output - def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary): quoted_Bnode_or_not = False if type(node) == rdflib.term.BNode: @@ -85,9 +75,20 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): pass - p = serializing_spo(p) - s = serializing_spo(s) - o = serializing_spo(o) + if isinstance(p, rdflib.term.URIRef): + p = "<"+str(p)+">" + elif isinstance(p, rdflib.term.Literal): + p = p._literal_n3(use_plain=True) + + if isinstance(s, rdflib.term.BNode): + s = "_:"+str(s) + + if isinstance(o, rdflib.term.URIRef): + o = "<"+str(o)+">" + elif isinstance(o, rdflib.term.Literal): + o = o._literal_n3(use_plain=True) + elif isinstance(o, rdflib.term.BNode): + o = "_:"+str(o) if not (node in blanknode_dictionary): @@ -169,9 +170,21 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) - subject = serializing_spo(subject) + if (isinstance(subject, rdflib.term.URIRef)): - object = serializing_spo(object) + subject = "<"+str(subject)+">" + elif isinstance(subject, rdflib.term.BNode): + subject = "_:"+str(subject) + elif isinstance(subject, rdflib.term.Literal): + subject = subject._literal_n3(use_plain=True) + + if (isinstance(object, rdflib.term.URIRef)): + + object = "<"+str(object)+">" + elif isinstance(object, rdflib.term.Literal): + object = object._literal_n3(use_plain=True) + elif isinstance(object, rdflib.term.BNode): + object = "_:"+str(object) if isinstance(predicate, rdflib.term.URIRef): predicate = "<"+str(predicate)+">" @@ -192,7 +205,6 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if ((not subjectexpandable) and (not objectexpandable)): - dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" if node not in dictionary: @@ -282,6 +294,7 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o elif isinstance(subject, rdflib.term.Literal): subject = subject._literal_n3(use_plain=True) elif isinstance(subject, rdflib.term.RdfstarTriple): + print("rdfstartriple", dictionary) subject = dictionary[subject] elif isinstance(subject, rdflib.term.BNode): @@ -355,7 +368,7 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o result_subject.insert(0, "(") result_subject.append(")") elif subject in blanknode_dictionary: - subject = "["+"".join(blanknode_dictionary[subject])+"]" + subject = "_:" + thenode_id elif ifquotedBnode: pass else: diff --git a/test_serializer_turtlestar.py b/test_serializer_turtlestar.py index 695f55a29..f7d08533b 100644 --- a/test_serializer_turtlestar.py +++ b/test_serializer_turtlestar.py @@ -19,45 +19,45 @@ from rdflib.namespace import RDF from rdflib.namespace import FOAF -# g = Graph() -# g.parse(data="test/turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse(data="test/turtle-star/turtle-star-syntax-basic-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-basic-02.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-basic-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-inside-01.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-inside-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-inside-02.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-inside-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-nested-01.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-nested-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-nested-02.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-nested-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-compound.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-compound.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-bnode-01.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-bnode-01.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-bnode-02.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-bnode-02.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/turtle-star-syntax-bnode-03.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/turtle-star-syntax-bnode-03.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) g = Graph() g.parse("test/turtle-star/turtle-star-annotation-1.ttl", format = "ttls") @@ -67,38 +67,38 @@ g.parse("test/turtle-star/turtle-star-annotation-2.ttl", format = "ttls") print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-syntax-1.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-syntax-2.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-syntax-3.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-3.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-syntax-4.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-4.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-syntax-5.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-syntax-5.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-bnode-1.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-bnode-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-bnode-2.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-bnode-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-nested-1.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-nested-1.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) -# g = Graph() -# g.parse("test/turtle-star/nt-ttl-star-nested-2.ttl", format = "ttls") -# print(g.serialize(format = "ttlstar")) +g = Graph() +g.parse("test/turtle-star/nt-ttl-star-nested-2.ttl", format = "ttls") +print(g.serialize(format = "ttlstar")) From 68bf86e04e868100d1961b8e7abe729f9c6945a5 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Thu, 22 Sep 2022 05:12:05 +1000 Subject: [PATCH 09/11] improving serializing efficiency in trigstar and turtlestar serializer --- rdflib/plugins/parsers/trigstar.py | 5 +- rdflib/plugins/parsers/turtlestar.py | 1 + rdflib/plugins/serializers/trigstar.py | 210 ++++------------------- rdflib/plugins/serializers/turtlestar.py | 184 +++----------------- test/test_parser_ntriplesstar.py | 10 -- 5 files changed, 57 insertions(+), 353 deletions(-) diff --git a/rdflib/plugins/parsers/trigstar.py b/rdflib/plugins/parsers/trigstar.py index 2bd700bd2..b9370b145 100644 --- a/rdflib/plugins/parsers/trigstar.py +++ b/rdflib/plugins/parsers/trigstar.py @@ -631,6 +631,7 @@ def triples(self, var): vblist.append([tri]) else: tri = tri.replace(";", "") + tri = tri.replace(" ", "") if not (tri in assertedtriplelist): assertedtriplelist.append(tri) for x in var.children: @@ -756,12 +757,12 @@ def expand_to_rdfstar(x): if ":G { }\n" in output_tree: output_tree = output_tree.replace(":G { }\n", ":G {") output_tree = output_tree+ "}" - + # print("test output tree", output_tree) tree = trig_lark.parse(output_tree) at = FindVariables().visit(tree) - + # print("asserted, quoted", assertedtriplelist, quotation_list) for y in vblist: for element_index in range(0, len(y)): diff --git a/rdflib/plugins/parsers/turtlestar.py b/rdflib/plugins/parsers/turtlestar.py index 78eb7709e..584583244 100644 --- a/rdflib/plugins/parsers/turtlestar.py +++ b/rdflib/plugins/parsers/turtlestar.py @@ -631,6 +631,7 @@ def triples(self, var): else: tri = tri.replace(";", "") + tri = tri.replace(" ", "") if not (tri in assertedtriplelist): assertedtriplelist.append(tri) for x in var.children: diff --git a/rdflib/plugins/serializers/trigstar.py b/rdflib/plugins/serializers/trigstar.py index 36451c369..39c368ac3 100644 --- a/rdflib/plugins/serializers/trigstar.py +++ b/rdflib/plugins/serializers/trigstar.py @@ -27,18 +27,15 @@ class TrigstarSerializer(Serializer): def __init__(self, store: Union[Graph, ConjunctiveGraph]): self.default_context: Optional[Node] - # print("init", list(store.contexts())) if store.context_aware: if TYPE_CHECKING: assert isinstance(store, ConjunctiveGraph) self.contexts = list(store.contexts()) - # print("sadasd", [store]) self.default_context = store.default_context.identifier if store.default_context: self.contexts.append(store.default_context) else: self.contexts = [store] - # print("asdasdas", store.default_context.identifier) self.default_context = None Serializer.__init__(self, store) @@ -95,15 +92,14 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if not (node in blanknode_dictionary): - blanknode_dictionary[node] = [p, o] + blanknode_dictionary[node] = [[p, o]] elif ((p in blanknode_dictionary[node]) & (o in blanknode_dictionary[node])): pass else: - blanknode_dictionary[node].append(";") - blanknode_dictionary[node].append(p) - blanknode_dictionary[node].append(o) + if not [p,o] in blanknode_dictionary[node]: + blanknode_dictionary[node].append([p,o]) else: @@ -119,7 +115,7 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): properties.append("(") - expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): properties.append(")") @@ -217,133 +213,10 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ pass return properties, collection_or_not, quoted_Bnode_or_not, dictionary - def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not): - - quoted_Bnode_or_not = False - if type(node) == rdflib.term.BNode: - for s, p, o in g.triples((node, None, None)): - if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): - pass - else: - - - if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): - collection_or_not = True - quoted_Bnode_or_not = False - - if o in dictionary: - properties.append(dictionary[o]) - - elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): - - if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): - properties.append("(") - - expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) - - if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): - properties.append(")") - - else: - - if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): - pass - - else: - collection_or_not = False - quoted_Bnode_or_not = False - - if (isinstance(p, rdflib.term.URIRef)): - p = "<"+str(p)+">" - elif isinstance(p, rdflib.term.Literal): - p = p._literal_n3(use_plain=True) - - - pass - properties.append(p) - if o in dictionary: - properties.append(dictionary[o]) - - else: - - expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) - - if type(node) == rdflib.term.RdfstarTriple: - - collection_or_not = False - quoted_Bnode_or_not = True - if node in dictionary: - - properties.append(dictionary[node]) - - else: - - subject = node.subject() - predicate = node.predicate() - object = node.object() - if subject in dictionary: - - subject = dictionary[subject] - if object in dictionary: - - object = dictionary[object] - subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) - objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) - - if (isinstance(subject, rdflib.term.URIRef)): - - subject = "<"+str(subject)+">" - elif isinstance(subject, rdflib.term.Literal): - subject = subject._literal_n3(use_plain=True) - elif isinstance(subject, rdflib.term.RdfstarTriple): - subject = dictionary[subject] - elif isinstance(subject, rdflib.term.BNode): - - if subject in blanknode_dictionary: - subject = "["+"".join(blanknode_dictionary[subject])+"]" - else: - - subject = "_:"+str(subject) - - - if (isinstance(object, rdflib.term.URIRef)): - - object = "<"+str(object)+">" - elif isinstance(object, rdflib.term.Literal): - object = object._literal_n3(use_plain=True) - elif isinstance(object, rdflib.term.RdfstarTriple): - object = dictionary[object] - elif isinstance(object, rdflib.term.BNode): - if object in blanknode_dictionary: - object = "["+"".join(blanknode_dictionary[object])+"]" - else: - object = "_:"+str(object) - - if isinstance(predicate, rdflib.term.URIRef): - predicate = "<"+str(predicate)+">" - - - - if ((not subjectexpandable) and (not objectexpandable)): - - - dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" - - if node not in dictionary: - - dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" - properties.append("<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>") - - else: - properties.append(dictionary[node]) - - return properties, collection_or_not, quoted_Bnode_or_not, dictionary - # this loop is for updating the quoted triple dictionary and blank node dictionary for g in self.contexts: for s,p,o in g.triples((None, None, None)): - if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): pass elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): @@ -371,7 +244,7 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o result_subject.insert(0, "(") result_subject.append(")") elif subject in blanknode_dictionary: - subject = "["+"".join(blanknode_dictionary[subject])+"]" + subject = "_:" + thenode_id elif ifquotedBnode: pass else: @@ -381,8 +254,6 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o subject = "".join(result_subject) else: - - # else: subject = "[]" if subject == "[]": @@ -439,10 +310,11 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o re1 = True if re1 or re2: - if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): - pass - elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + + if("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: subject = s @@ -460,33 +332,19 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o elif isinstance(subject, rdflib.term.Literal): subject = subject._literal_n3(use_plain=True) elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): - thenode_id = str(subject) - - result_subject, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) - - if (not len(result_subject) == 0): - if ifcollection == True: - result_subject.insert(0, "(") - result_subject.append(")") - - elif ifquotedBnode: - pass - else: - - result_subject.insert(0, "[") - result_subject.append("]") - subject = "".join(result_subject) + if isinstance(subject, rdflib.term.RdfstarTriple): + subject = dictionary[subject] else: if (subject in blanknode_dictionary): if(len(blanknode_dictionary[subject])>2): - subject = "["+"".join(blanknode_dictionary[subject])+"]" - else: - subject = "[]" - else: - subject = "[]" - if subject == "[]": + for x in blanknode_dictionary[subject]: + subject_node = "_:"+str(subject) + output =subject_node + x[0] + x[1]+". \n" + if output is not None: + stream.write(output.encode()) + + subject = " _:"+subject - subject = " _:"+thenode_id properties = [] @@ -496,38 +354,28 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o object = object._literal_n3(use_plain=True) elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): thenode_id = str(object) - result_object, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) - - - if (not len(result_object) == 0): - if ifcollection == True: - result_object.insert(0, "(") - result_object.append(")") - - elif ifquotedBnode: - pass - else: - result_object.insert(0, "[") - result_object.append("]") - object = "".join(result_object) + if isinstance(object, rdflib.term.RdfstarTriple): + object = dictionary[object] else: if (object in blanknode_dictionary): if(len(blanknode_dictionary[object])>2): - object = "["+"".join(blanknode_dictionary[object])+"]" - else: - object = "[]" - else: - object = "[]" + for x in blanknode_dictionary[object]: + object_node = "_:" + str(object) + output = object_node + x[0] + x[1] + ". \n" + print("what", output) + if output is not None: - if object == "[]": + stream.write(output.encode()) + + object = " _:"+object - object = " _:"+thenode_id properties = [] if(isinstance(predicate, rdflib.term.URIRef)): predicate = "<"+str(predicate)+">" output = subject+" "+predicate+" "+object+" <"+str(g.identifier)+"> "" ."+"\n" + if output is not None: stream.write(output.encode()) diff --git a/rdflib/plugins/serializers/turtlestar.py b/rdflib/plugins/serializers/turtlestar.py index 16c87dd29..7a181c979 100644 --- a/rdflib/plugins/serializers/turtlestar.py +++ b/rdflib/plugins/serializers/turtlestar.py @@ -117,7 +117,7 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): properties.append("(") - expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) + update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): properties.append(")") @@ -216,126 +216,6 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ pass return properties, collection_or_not, quoted_Bnode_or_not, dictionary - def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not): - - quoted_Bnode_or_not = False - if type(node) == rdflib.term.BNode: - for s, p, o in g.triples((node, None, None)): - if (isinstance(s, rdflib.term.BNode) & (not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): - pass - else: - - if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): - collection_or_not = True - quoted_Bnode_or_not = False - - if o in dictionary: - properties.append(dictionary[o]) - - elif not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" in o): - - if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p) and (not isinstance(o, rdflib.term.RdfstarTriple))): - properties.append("(") - - expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) - - if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): - properties.append(")") - - else: - - if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): - pass - - else: - collection_or_not = False - quoted_Bnode_or_not = False - if (isinstance(p, rdflib.term.URIRef)): - p = "<"+str(p)+">" - elif isinstance(p, rdflib.term.Literal): - p = p._literal_n3(use_plain=True) - - - pass - properties.append(p) - if o in dictionary: - properties.append(dictionary[o]) - - else: - - expand_Bnode_and_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not) - - - if type(node) == rdflib.term.RdfstarTriple: - - collection_or_not = False - quoted_Bnode_or_not = True - if node in dictionary: - - properties.append(dictionary[node]) - - else: - - subject = node.subject() - predicate = node.predicate() - object = node.object() - if subject in dictionary: - - subject = dictionary[subject] - if object in dictionary: - - object = dictionary[object] - subjectexpandable = ((type(subject) == rdflib.term.BNode) or (type(subject) == rdflib.term.RdfstarTriple)) - objectexpandable = ((type(object) == rdflib.term.BNode) or (type(object) == rdflib.term.RdfstarTriple)) - - if (isinstance(subject, rdflib.term.URIRef)): - - subject = "<"+str(subject)+">" - elif isinstance(subject, rdflib.term.Literal): - subject = subject._literal_n3(use_plain=True) - elif isinstance(subject, rdflib.term.RdfstarTriple): - print("rdfstartriple", dictionary) - subject = dictionary[subject] - elif isinstance(subject, rdflib.term.BNode): - - if subject in blanknode_dictionary: - subject = "["+"".join(blanknode_dictionary[subject])+"]" - else: - subject = "_:"+str(subject) - - - if (isinstance(object, rdflib.term.URIRef)): - - object = "<"+str(object)+">" - elif isinstance(object, rdflib.term.Literal): - object = object._literal_n3(use_plain=True) - elif isinstance(object, rdflib.term.RdfstarTriple): - object = dictionary[object] - elif isinstance(object, rdflib.term.BNode): - if object in blanknode_dictionary: - object = "["+"".join(blanknode_dictionary[object])+"]" - else: - object = "_:"+str(object) - - if isinstance(predicate, rdflib.term.URIRef): - predicate = "<"+str(predicate)+">" - - - if ((not subjectexpandable) and (not objectexpandable)): - - - dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" - - if node not in dictionary: - - dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" - properties.append("<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>") - - else: - properties.append(dictionary[node]) - - return properties, collection_or_not, quoted_Bnode_or_not, dictionary - # this loop is for updating the quoted triple dictionary and blank node dictionary for g in self.contexts: @@ -427,6 +307,7 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o re1 = False re2 = False if len(blanknode_dictionary[s]) < 4: + re2 = True else: @@ -434,10 +315,11 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o re1 = True if re1 or re2: - if (isinstance(s, rdflib.term.BNode) & (isinstance(o, rdflib.term.BNode) or isinstance(o, rdflib.term.RdfstarTriple) or isinstance(p, rdflib.term.BNode) or isinstance(p, rdflib.term.RdfstarTriple))): - pass - elif("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + + if("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p or "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): + pass + else: subject = s @@ -455,30 +337,19 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o elif isinstance(subject, rdflib.term.Literal): subject = subject._literal_n3(use_plain=True) elif (isinstance(subject, rdflib.term.BNode) or isinstance(subject, rdflib.term.RdfstarTriple)): - thenode_id = str(subject) - - result_subject, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(subject,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) - - if (not len(result_subject) == 0): - if ifcollection == True: - result_subject.insert(0, "(") - result_subject.append(")") - - elif ifquotedBnode: - pass - else: - - result_subject.insert(0, "[") - result_subject.append("]") - subject = "".join(result_subject) + if isinstance(subject, rdflib.term.RdfstarTriple): + subject = dictionary[subject] else: if (subject in blanknode_dictionary): if(len(blanknode_dictionary[subject])>2): for x in blanknode_dictionary[subject]: - output ="_:" + thenode_id + x[0] + x[1]+". \n" + subject_node = "_:"+str(subject) + output =subject_node + x[0] + x[1]+". \n" if output is not None: stream.write(output.encode()) - subject = " _:"+thenode_id + + subject = " _:"+subject + properties = [] @@ -488,38 +359,31 @@ def expand_Bnode_and_RdfstarTriple(node, g, dictionary, properties, collection_o object = object._literal_n3(use_plain=True) elif (isinstance(object, rdflib.term.BNode) or isinstance(object, rdflib.term.RdfstarTriple)): thenode_id = str(object) - result_object, ifcollection, ifquotedBnode, d = expand_Bnode_and_RdfstarTriple(object,g,dictionary,properties,collection_or_not, quoted_Bnode_or_not) - - - if (not len(result_object) == 0): - if ifcollection == True: - result_object.insert(0, "(") - result_object.append(")") - - elif ifquotedBnode: - pass - else: - result_object.insert(0, "[") - result_object.append("]") - object = "".join(result_object) + if isinstance(object, rdflib.term.RdfstarTriple): + object = dictionary[object] else: if (object in blanknode_dictionary): if(len(blanknode_dictionary[object])>2): for x in blanknode_dictionary[object]: - output = "_:" + thenode_id + x[0] + x[1] + ". \n" + object_node = "_:" + str(object) + output = object_node + x[0] + x[1] + ". \n" + print("what", output) if output is not None: + stream.write(output.encode()) - object = " _:"+thenode_id + + object = " _:"+object + properties = [] if(isinstance(predicate, rdflib.term.URIRef)): predicate = "<"+str(predicate)+">" - output = subject+" "+predicate+" "+object+" ."+"\n" + output = subject+" "+predicate+" "+object+ " ."+"\n" + if output is not None: stream.write(output.encode()) - def _iri_or_bn(self, i_): if isinstance(i_, URIRef): return f"{i_}" diff --git a/test/test_parser_ntriplesstar.py b/test/test_parser_ntriplesstar.py index f4fc898fc..886134496 100644 --- a/test/test_parser_ntriplesstar.py +++ b/test/test_parser_ntriplesstar.py @@ -14,13 +14,6 @@ from rdflib.plugin import register from rdflib.parser import Parser -register( - "ntstar", - Parser, - "rdflib.plugins.parsers.ntriples-star", - "NtriplesStarParser", -) - # tests should be past def test_NtriplesPositiveSyntax_subject(): g = Graph() @@ -46,9 +39,6 @@ def test_NtriplesPositiveSyntax_nestedquotedtriplesubjectposition(): def test_NtriplesPositiveSyntax_nestedquotedtripleobjectposition(): g = Graph() assert isinstance(g.parse("ntriples-star/ntriples-star-bnode-1.nt", format = "ntstar"), Graph) - print(g.serialize()) - # for s, p, o, g in g.quads((None, RDF.type, None, None)): - # print(s) def test_NtriplesPositiveSyntax_compoundforms(): g = Graph() From 29b020c6be0762ca65b2bea6fc3a14364d61b0c5 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Thu, 22 Sep 2022 05:51:39 +1000 Subject: [PATCH 10/11] improving serializing efficiency in trigstar and turtlestar serializer --- rdflib/plugins/serializers/trigstar.py | 16 ++-------------- rdflib/plugins/serializers/turtlestar.py | 14 +------------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/rdflib/plugins/serializers/trigstar.py b/rdflib/plugins/serializers/trigstar.py index 39c368ac3..86032b2bc 100644 --- a/rdflib/plugins/serializers/trigstar.py +++ b/rdflib/plugins/serializers/trigstar.py @@ -64,11 +64,13 @@ def serialize( raise Exception( "Hextuple serialization can't (yet) handle formula-aware stores" ) + dictionary = {} blanknode_dictionary = {} result_subject = "" result_object = "" output = "" + def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary): quoted_Bnode_or_not = False if type(node) == rdflib.term.BNode: @@ -100,9 +102,7 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if not [p,o] in blanknode_dictionary[node]: blanknode_dictionary[node].append([p,o]) - else: - if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): collection_or_not = True quoted_Bnode_or_not = False @@ -124,40 +124,29 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): pass - else: collection_or_not = False quoted_Bnode_or_not = False - if (isinstance(p, rdflib.term.URIRef)): p = "<"+str(p)+">" elif isinstance(p, rdflib.term.Literal): p = p._literal_n3(use_plain=True) - - pass properties.append(p) if o in dictionary: properties.append(dictionary[o]) - else: - update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) - if type(node) == rdflib.term.RdfstarTriple: - collection_or_not = False quoted_Bnode_or_not = True if node in dictionary: - pass else: - subject = node.subject() predicate = node.predicate() object = node.object() - if subject in dictionary: subject = dictionary[subject] @@ -207,7 +196,6 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if node not in dictionary: dictionary[node] = "<<" + " "+str(subject)+ " "+str(predicate) + " "+str(object) + " "+">>" - else: pass diff --git a/rdflib/plugins/serializers/turtlestar.py b/rdflib/plugins/serializers/turtlestar.py index 7a181c979..6dc922d41 100644 --- a/rdflib/plugins/serializers/turtlestar.py +++ b/rdflib/plugins/serializers/turtlestar.py @@ -63,6 +63,7 @@ def serialize( raise Exception( "Hextuple serialization can't (yet) handle formula-aware stores" ) + dictionary = {} blanknode_dictionary = {} result_subject = "" @@ -101,7 +102,6 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if not [p,o] in blanknode_dictionary[node]: blanknode_dictionary[node].append([p,o]) - else: if ("http://www.w3.org/1999/02/22-rdf-syntax-ns#first" in p) or ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p): @@ -121,42 +121,30 @@ def update_dictionary_RdfstarTriple(node, g, dictionary, properties, collection_ if (not ("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" in p)and (not isinstance(o, rdflib.term.RdfstarTriple))): properties.append(")") - else: - if ((not isinstance(o, rdflib.term.BNode)) & (not isinstance(o, rdflib.term.RdfstarTriple)) & ((not isinstance(p, rdflib.term.BNode)) & (not isinstance(p, rdflib.term.RdfstarTriple)))): pass - else: collection_or_not = False quoted_Bnode_or_not = False - if (isinstance(p, rdflib.term.URIRef)): p = "<"+str(p)+">" elif isinstance(p, rdflib.term.Literal): p = p._literal_n3(use_plain=True) - - pass properties.append(p) if o in dictionary: properties.append(dictionary[o]) - else: - update_dictionary_RdfstarTriple(o, g, dictionary,properties, collection_or_not, quoted_Bnode_or_not, blanknode_dictionary) - if type(node) == rdflib.term.RdfstarTriple: collection_or_not = False quoted_Bnode_or_not = True if node in dictionary: - pass else: - - subject = node.subject() predicate = node.predicate() object = node.object() From 997aa56dcff947293ea1e7ec79eaec0534eeef04 Mon Sep 17 00:00:00 2001 From: Xuguang Song Date: Tue, 27 Sep 2022 08:48:18 +1000 Subject: [PATCH 11/11] remove some redundant codes in turtlestar and trigstar --- rdflib/plugins/parsers/ntriples-star.py | 6 +- rdflib/plugins/parsers/trigstar.py | 2059 +---------------------- rdflib/plugins/parsers/turtlestar.py | 1705 +------------------ 3 files changed, 50 insertions(+), 3720 deletions(-) diff --git a/rdflib/plugins/parsers/ntriples-star.py b/rdflib/plugins/parsers/ntriples-star.py index ec550cc2a..5899b8c0f 100644 --- a/rdflib/plugins/parsers/ntriples-star.py +++ b/rdflib/plugins/parsers/ntriples-star.py @@ -313,7 +313,6 @@ def runNamespace(): iri: IRIREF | prefixed_name prefixed_name: PNAME_LN | PNAME_NS blank_node: BLANK_NODE_LABEL | ANON - BASE_DIRECTIVE: "@base" IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" PNAME_NS: PN_PREFIX? ":" @@ -342,7 +341,6 @@ def runNamespace(): PERCENT: "%" HEX~2 HEX: /[0-9A-Fa-f]/ PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ - %ignore WS COMMENT: "#" /[^\n]/* %ignore COMMENT @@ -2351,6 +2349,10 @@ def parse( f.close() bp = rdbytes.decode("utf-8") + if "<<" or "{|" in bp: + ou = RDFstarParsings(bp) + else: + ou = bp ou = RDFstarParsings(bp) p.feed(ou) p.endDoc() diff --git a/rdflib/plugins/parsers/trigstar.py b/rdflib/plugins/parsers/trigstar.py index b9370b145..2fc92e66f 100644 --- a/rdflib/plugins/parsers/trigstar.py +++ b/rdflib/plugins/parsers/trigstar.py @@ -1,51 +1,13 @@ -#!/usr/bin/env python -""" -notation3.py - Standalone Notation3 Parser -Derived from CWM, the Closed World Machine -Authors of the original suite: -* Dan Connolly <@@> -* Tim Berners-Lee <@@> -* Yosi Scharf <@@> -* Joseph M. Reagle Jr. -* Rich Salz -http://www.w3.org/2000/10/swap/notation3.py -Copyright 2000-2007, World Wide Web Consortium. -Copyright 2001, MIT. -Copyright 2001, Zolera Systems Inc. -License: W3C Software License -http://www.w3.org/Consortium/Legal/copyright-software -Modified by Sean B. Palmer -Copyright 2007, Sean B. Palmer. -Modified to work with rdflib by Gunnar Aastrand Grimnes -Copyright 2010, Gunnar A. Grimnes -""" -import codecs -import os -import re + from smtplib import quotedata -import sys -import rdflib # importing typing for `typing.List` because `List`` is used for something else -import typing -from decimal import Decimal from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union from uuid import uuid4 - -from rdflib.compat import long_type +from .turtlestar import StarRDFSink, StarsinkParser +from .trig import becauseSubGraph from rdflib.exceptions import ParserError from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph -from rdflib.term import ( - _XSD_PFX, - RdfstarTriple, - BNode, - Identifier, - Literal, - Node, - URIRef, - Variable, - _unique_id, -) __all__ = [ "BadSyntax", @@ -66,191 +28,12 @@ AnyT = TypeVar("AnyT") - -def splitFragP(uriref, punct=0): - """split a URI reference before the fragment - Punctuation is kept. - e.g. - >>> splitFragP("abc#def") - ('abc', '#def') - >>> splitFragP("abcdef") - ('abcdef', '') - """ - - i = uriref.rfind("#") - if i >= 0: - return uriref[:i], uriref[i:] - else: - return uriref, "" - - -def join(here, there): - """join an absolute URI and URI reference - (non-ascii characters are supported/doctested; - haven't checked the details of the IRI spec though) - ``here`` is assumed to be absolute. - ``there`` is URI reference. - >>> join('http://example/x/y/z', '../abc') - 'http://example/x/abc' - Raise ValueError if there uses relative path - syntax but here has no hierarchical path. - >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - raise ValueError(here) - ValueError: Base has no slash - after colon - with relative '../foo'. - >>> join('http://example/x/y/z', '') - 'http://example/x/y/z' - >>> join('mid:foo@example', '#foo') - 'mid:foo@example#foo' - We grok IRIs - >>> len(u'Andr\\xe9') - 5 - >>> join('http://example.org/', u'#Andr\\xe9') - u'http://example.org/#Andr\\xe9' - """ - - # assert(here.find("#") < 0), \ - # "Base may not contain hash: '%s'" % here # why must caller splitFrag? - - slashl = there.find("/") - colonl = there.find(":") - - # join(base, 'foo:/') -- absolute - if colonl >= 0 and (slashl < 0 or colonl < slashl): - return there - - bcolonl = here.find(":") - assert bcolonl >= 0, ( - "Base uri '%s' is not absolute" % here - ) # else it's not absolute - - path, frag = splitFragP(there) - if not path: - return here + frag - - # join('mid:foo@example', '../foo') bzzt - if here[bcolonl + 1 : bcolonl + 2] != "/": - raise ValueError( - "Base <%s> has no slash after " - "colon - with relative '%s'." % (here, there) - ) - - if here[bcolonl + 1 : bcolonl + 3] == "//": - bpath = here.find("/", bcolonl + 3) - else: - bpath = bcolonl + 1 - - # join('http://xyz', 'foo') - if bpath < 0: - bpath = len(here) - here = here + "/" - - # join('http://xyz/', '//abc') => 'http://abc' - if there[:2] == "//": - return here[: bcolonl + 1] + there - - # join('http://xyz/', '/abc') => 'http://xyz/abc' - if there[:1] == "/": - return here[:bpath] + there - - slashr = here.rfind("/") - - while 1: - if path[:2] == "./": - path = path[2:] - if path == ".": - path = "" - elif path[:3] == "../" or path == "..": - path = path[3:] - i = here.rfind("/", bpath, slashr) - if i >= 0: - here = here[: i + 1] - slashr = i - else: - break - - return here[: slashr + 1] + path + frag - - -def base(): - """The base URI for this process - the Web equiv of cwd - Relative or absolute unix-standard filenames parsed relative to - this yield the URI of the file. - If we had a reliable way of getting a computer name, - we should put it in the hostname just to prevent ambiguity - """ - # return "file://" + hostname + os.getcwd() + "/" - return "file://" + _fixslash(os.getcwd()) + "/" - - -def _fixslash(s): - """Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" - s = s.replace("\\", "/") - if s[0] != "/" and s[1] == ":": - s = s[2:] # @@@ Hack when drive letter present - return s - - -CONTEXT = 0 -PRED = 1 -SUBJ = 2 -OBJ = 3 - -PARTS = PRED, SUBJ, OBJ -ALL4 = CONTEXT, PRED, SUBJ, OBJ - -SYMBOL = 0 -FORMULA = 1 -LITERAL = 2 -LITERAL_DT = 21 -LITERAL_LANG = 22 -ANONYMOUS = 3 -XMLLITERAL = 25 - -Logic_NS = "http://www.w3.org/2000/10/swap/log#" -NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging -forSomeSym = Logic_NS + "forSome" -forAllSym = Logic_NS + "forAll" - -RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" -RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -OWL_NS = "http://www.w3.org/2002/07/owl#" -DAML_sameAs_URI = OWL_NS + "sameAs" -parsesTo_URI = Logic_NS + "parsesTo" -RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" - -List_NS = RDF_NS_URI # From 20030808 -_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" - -N3_first = (SYMBOL, List_NS + "first") -N3_rest = (SYMBOL, List_NS + "rest") -N3_li = (SYMBOL, List_NS + "li") -N3_nil = (SYMBOL, List_NS + "nil") -N3_List = (SYMBOL, List_NS + "List") -N3_Empty = (SYMBOL, List_NS + "Empty") - - -runNamespaceValue = None - - -def runNamespace(): - """Returns a URI suitable as a namespace for run-local objects""" - # @@@ include hostname (privacy?) (hash it?) - global runNamespaceValue - if runNamespaceValue is None: - runNamespaceValue = join(base(), _unique_id()) + "#" - return runNamespaceValue - - nextu = 0 from rdflib import ConjunctiveGraph from rdflib.parser import Parser from .notation3 import SinkParser, RDFSink -import re -import lark import hashlib from lark import ( Lark, @@ -264,24 +47,6 @@ def runNamespace(): Token, ) -# from pymantic.compat import ( -# binary_type, -# ) -# from pymantic.parsers.base import ( -# BaseParser, -# ) -# from pymantic.primitives import ( -# BlankNode, -# Literal, -# NamedNode, -# Triple, -# ) -# from pymantic.util import ( -# grouper, -# smart_urljoin, -# decode_literal, -# ) - from typing import List, Dict, Union, Callable, Iterable, Optional from lark import Lark @@ -427,7 +192,6 @@ def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str iri: IRIREF | prefixed_name prefixed_name: PNAME_LN | PNAME_NS blank_node: BLANK_NODE_LABEL | ANON - BASE_DIRECTIVE: "@base" IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" PNAME_NS: PN_PREFIX? ":" @@ -456,7 +220,6 @@ def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str PERCENT: "%" HEX~2 HEX: /[0-9A-Fa-f]/ PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ - %ignore WS COMMENT: "#" /[^\n]/* %ignore COMMENT @@ -532,27 +295,32 @@ def triples(self, var): po_list = [] to_remove.append(tri) def compoundanno(self, var): + appends1 = [] tri2 = Reconstructorv2(trig_lark).reconstruct(var) - for x in var.children[1].children: + test = Reconstructorv2(trig_lark).reconstruct(x) if "{|" in test: test123 = test.split("{|",1) + object = test123[0] + test123.pop(0) + test_annotation = "{|"+ "".join(test123) result = annotation_dict[test_annotation] - if not tri2 in annotation_dict: - annotation_dict[tri2] = [object,result] - else: - annotation_dict[tri2].append(object) - annotation_dict[tri2].append(result) + + appends1.append(object) + appends1.append(result) else: - if not tri2 in annotation_dict: - annotation_dict[tri2] = [test] - else: - annotation_dict[tri2].append(test) + appends1.append(test) + + if not tri2 in annotation_dict: + annotation_dict[tri2] = appends1 + elif not appends1 == annotation_dict[tri2]: + for x in appends1: + annotation_dict[tri2].append(x) class FindVariables(Visitor): def __init__(self): @@ -863,1774 +631,7 @@ def expand_to_rdfstar(x): constructors = bytes(constructors, 'utf-8') return constructors -def becauseSubGraph(*args, **kwargs): - pass - - -def uniqueURI(): - """A unique URI""" - global nextu - nextu += 1 - return runNamespace() + "u_" + str(nextu) - -tracking = False -chatty_flag = 50 - -# from why import BecauseOfData, becauseSubexpression - -def BecauseOfData(*args, **kargs): - # print args, kargs - pass - - -def becauseSubexpression(*args, **kargs): - # print args, kargs - pass - - -N3_forSome_URI = forSomeSym -N3_forAll_URI = forAllSym - -# Magic resources we know about - -ADDED_HASH = "#" # Stop where we use this in case we want to remove it! -# This is the hash on namespace URIs - -RDF_type = (SYMBOL, RDF_type_URI) -DAML_sameAs = (SYMBOL, DAML_sameAs_URI) - -LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" - -BOOLEAN_DATATYPE = _XSD_PFX + "boolean" -DECIMAL_DATATYPE = _XSD_PFX + "decimal" -DOUBLE_DATATYPE = _XSD_PFX + "double" -FLOAT_DATATYPE = _XSD_PFX + "float" -INTEGER_DATATYPE = _XSD_PFX + "integer" - -option_noregen = 0 # If set, do not regenerate genids on output - -# @@ I18n - the notname chars need extending for well known unicode non-text -# characters. The XML spec switched to assuming unknown things were name -# characters. -# _namechars = string.lowercase + string.uppercase + string.digits + '_-' -_notQNameChars = set("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ -_notKeywordsChars = _notQNameChars | {"."} -_notNameChars = _notQNameChars | {":"} # Assume anything else valid name :-/ -_rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" - -hexChars = set("ABCDEFabcdef0123456789") -escapeChars = set("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames -numberChars = set("0123456789-") -numberCharsPlus = numberChars | {"+", "."} - - -def unicodeExpand(m): - try: - return chr(int(m.group(1), 16)) - except: - raise Exception("Invalid unicode code point: " + m.group(1)) - - -unicodeEscape4 = re.compile(r"\\u([0-9a-fA-F]{4})") -unicodeEscape8 = re.compile(r"\\U([0-9a-fA-F]{8})") - - -N3CommentCharacter = "#" # For unix script # ! compatibility - -# Parse string to sink -# -# Regular expressions: -eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment -eof = re.compile(r"[ \t]*(#[^\n]*)?$") # end of file, poss. w/comment -ws = re.compile(r"[ \t]*") # Whitespace not including NL -signed_integer = re.compile(r"[-+]?[0-9]+") # integer -integer_syntax = re.compile(r"[-+]?[0-9]+") -decimal_syntax = re.compile(r"[-+]?[0-9]*\.[0-9]+") -exponent_syntax = re.compile( - r"[-+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:e|E)[-+]?[0-9]+" -) -digitstring = re.compile(r"[0-9]+") # Unsigned integer -interesting = re.compile(r"""[\\\r\n\"\']""") -langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") - -quoted_triple_list = [] -class SinkParser: - def __init__( - self, - store: "RDFSink", - openFormula: Optional["Formula"] = None, - thisDoc: str = "", - baseURI: Optional[str] = None, - genPrefix: str = "", - why: Optional[Callable[[], None]] = None, - turtle: bool = False, - ): - """note: namespace names should *not* end in # ; - the # will get added during qname processing""" - - self._bindings = {} - if thisDoc != "": - assert ":" in thisDoc, "Document URI not absolute: <%s>" % thisDoc - self._bindings[""] = thisDoc + "#" # default - - self._store = store - if genPrefix: - # TODO FIXME: there is no function named setGenPrefix - store.setGenPrefix(genPrefix) # type: ignore[attr-defined] # pass it on - - self._thisDoc = thisDoc - self.lines = 0 # for error handling - self.startOfLine = 0 # For calculating character number - self._genPrefix = genPrefix - self.keywords = ["a", "this", "bind", "has", "is", "of", "true", "false"] - self.keywordsSet = 0 # Then only can others be considered qnames - self._anonymousNodes: Dict[str, Node] = {} - self._rdfstartripleNodes: Dict[str, Node] = {} - # Dict of anon nodes already declared ln: Term - self._variables: Dict[Identifier, Identifier] = {} - self._parentVariables: Dict[Identifier, Identifier] = {} - self._reason = why # Why the parser was asked to parse this - - self.turtle = turtle # raise exception when encountering N3 extensions - # Turtle allows single or double quotes around strings, whereas N3 - # only allows double quotes. - self.string_delimiters = ('"', "'") if turtle else ('"',) - - self._reason2 = None # Why these triples - # was: diag.tracking - if tracking: - self._reason2 = BecauseOfData( - store.newSymbol(thisDoc), because=self._reason - ) - - self._baseURI: Optional[str] - if baseURI: - self._baseURI = baseURI - else: - if thisDoc: - self._baseURI = thisDoc - else: - self._baseURI = None - - assert not self._baseURI or ":" in self._baseURI - - if not self._genPrefix: - if self._thisDoc: - self._genPrefix = self._thisDoc + "#_g" - else: - self._genPrefix = uniqueURI() - - self._formula: Formula - if openFormula is None and not turtle: - if self._thisDoc: - # TODO FIXME: store.newFormula does not take any arguments - self._formula = store.newFormula(thisDoc + "#_formula") # type: ignore[call-arg] - else: - self._formula = store.newFormula() - else: - self._formula = openFormula # type: ignore[assignment] - - self._context = self._formula - self._parentContext: Optional[Formula] = None - - def here(self, i: int) -> str: - """String generated from position in file - This is for repeatability when referring people to bnodes in a document. - This has diagnostic uses less formally, as it should point one to which - bnode the arbitrary identifier actually is. It gives the - line and character number of the '[' charcacter or path character - which introduced the blank node. The first blank node is boringly - _L1C1. It used to be used only for tracking, but for tests in general - it makes the canonical ordering of bnodes repeatable.""" - - return "%s_L%iC%i" % (self._genPrefix, self.lines, i - self.startOfLine + 1) - - def formula(self): - return self._formula - - def loadStream(self, stream: Union[IO[str], IO[bytes]]) -> Optional["Formula"]: - return self.loadBuf(stream.read()) # Not ideal - - def loadBuf(self, buf: Union[str, bytes]): - """Parses a buffer and returns its top level formula""" - self.startDoc() - - self.feed(buf) - return self.endDoc() # self._formula - - def feed(self, octets: Union[str, bytes]): - """Feed an octet stream to the parser - if BadSyntax is raised, the string - passed in the exception object is the - remainder after any statements have been parsed. - So if there is more data to feed to the - parser, it should be straightforward to recover.""" - - if not isinstance(octets, str): - s = octets.decode("utf-8") - # NB already decoded, so \ufeff - if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode("utf-8"): - s = s[1:] - else: - s = octets - - i = 0 - while i >= 0: - j = self.skipSpace(s, i) - if j < 0: - return - i = self.directiveOrStatement(s, j) - if i < 0: - # print("# next char: %s" % s[j-5:j+5]) - # print("asdadasd", i, j) - self.BadSyntax(s, j, "expected directive or statement") - - def directiveOrStatement(self, argstr: str, h: int) -> int: - - i = self.skipSpace(argstr, h) - if i < 0: - return i # EOF - - if self.turtle: - j = self.sparqlDirective(argstr, i) - if j >= 0: - return j - - j = self.directive(argstr, i) - if j >= 0: - return self.checkDot(argstr, j) - j = self.statement(argstr, i) - if j >= 0: - return self.checkDot(argstr, j) - - return j - - # @@I18N - # _namechars = string.lowercase + string.uppercase + string.digits + '_-' - - def tok(self, tok: str, argstr: str, i: int, colon: bool = False): - """Check for keyword. Space must have been stripped on entry and - we must not be at end of file. - if colon, then keyword followed by colon is ok - (@prefix: is ok, rdf:type shortcut a must be followed by ws) - """ - - assert tok[0] not in _notNameChars # not for punctuation - if argstr[i] == "@": - i += 1 - else: - if tok not in self.keywords: - return -1 # No, this has neither keywords declaration nor "@" - - i_plus_len_tok = i + len(tok) - if ( - argstr[i:i_plus_len_tok] == tok - and (argstr[i_plus_len_tok] in _notKeywordsChars) - or (colon and argstr[i_plus_len_tok] == ":") - ): - return i_plus_len_tok - else: - return -1 - - def sparqlTok(self, tok: str, argstr: str, i: int) -> int: - """Check for SPARQL keyword. Space must have been stripped on entry - and we must not be at end of file. - Case insensitive and not preceded by @ - """ - - assert tok[0] not in _notNameChars # not for punctuation - - len_tok = len(tok) - if argstr[i : i + len_tok].lower() == tok.lower() and ( - argstr[i + len_tok] in _notQNameChars - ): - i += len_tok - return i - else: - return -1 - - def directive(self, argstr: str, i: int) -> int: - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - res: typing.List[Any] = [] - - j = self.tok("bind", argstr, i) # implied "#". Obsolete. - if j > 0: - self.BadSyntax(argstr, i, "keyword bind is obsolete: use @prefix") - - j = self.tok("keywords", argstr, i) - if j > 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") - - i = self.commaSeparatedList(argstr, j, res, self.bareWord) - if i < 0: - self.BadSyntax( - argstr, i, "'@keywords' needs comma separated list of words" - ) - self.setKeywords(res[:]) - return i - - j = self.tok("forAll", argstr, i) - if j > 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") - - i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) - if i < 0: - self.BadSyntax(argstr, i, "Bad variable list after @forAll") - for x in res: - # self._context.declareUniversal(x) - if x not in self._variables or x in self._parentVariables: - self._variables[x] = self._context.newUniversal(x) - return i - - j = self.tok("forSome", argstr, i) - if j > 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") - - i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) - if i < 0: - self.BadSyntax(argstr, i, "Bad variable list after @forSome") - for x in res: - self._context.declareExistential(x) - return i - - j = self.tok("prefix", argstr, i, colon=True) # no implied "#" - if j >= 0: - t: typing.List[Any] = [] - i = self.qname(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected qname after @prefix") - j = self.uri_ref2(argstr, i, t) - if j < 0: - self.BadSyntax(argstr, i, "expected after @prefix _qname_") - ns = self.uriOf(t[1]) - - if self._baseURI: - ns = join(self._baseURI, ns) - elif ":" not in ns: - self.BadSyntax( - argstr, - j, - f"With no base URI, cannot use relative URI in @prefix <{ns}>", - ) - assert ":" in ns # must be absolute - self._bindings[t[0][0]] = ns - self.bind(t[0][0], hexify(ns)) - return j - - j = self.tok("base", argstr, i) # Added 2007/7/7 - if j >= 0: - t = [] - i = self.uri_ref2(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected after @base ") - ns = self.uriOf(t[0]) - - if self._baseURI: - ns = join(self._baseURI, ns) - else: - self.BadSyntax( - argstr, - j, - "With no previous base URI, cannot use " - + "relative URI in @base <" - + ns - + ">", - ) - assert ":" in ns # must be absolute - self._baseURI = ns - return i - - return -1 # Not a directive, could be something else. - - def sparqlDirective(self, argstr: str, i: int): - - """ - turtle and trig support BASE/PREFIX without @ and without - terminating . - """ - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - - j = self.sparqlTok("PREFIX", argstr, i) - if j >= 0: - t: typing.List[Any] = [] - i = self.qname(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected qname after @prefix") - j = self.uri_ref2(argstr, i, t) - if j < 0: - self.BadSyntax(argstr, i, "expected after @prefix _qname_") - ns = self.uriOf(t[1]) - - if self._baseURI: - ns = join(self._baseURI, ns) - elif ":" not in ns: - self.BadSyntax( - argstr, - j, - "With no base URI, cannot use " - + "relative URI in @prefix <" - + ns - + ">", - ) - assert ":" in ns # must be absolute - self._bindings[t[0][0]] = ns - self.bind(t[0][0], hexify(ns)) - return j - - j = self.sparqlTok("BASE", argstr, i) - if j >= 0: - t = [] - i = self.uri_ref2(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected after @base ") - ns = self.uriOf(t[0]) - - if self._baseURI: - ns = join(self._baseURI, ns) - else: - self.BadSyntax( - argstr, - j, - "With no previous base URI, cannot use " - + "relative URI in @base <" - + ns - + ">", - ) - assert ":" in ns # must be absolute - self._baseURI = ns - return i - - return -1 # Not a directive, could be something else. - - def bind(self, qn: str, uri: bytes) -> None: - assert isinstance(uri, bytes), "Any unicode must be %x-encoded already" - if qn == "": - self._store.setDefaultNamespace(uri) - else: - self._store.bind(qn, uri) - - def setKeywords(self, k: Optional[typing.List[str]]): - """Takes a list of strings""" - if k is None: - self.keywordsSet = 0 - else: - self.keywords = k - self.keywordsSet = 1 - - def startDoc(self) -> None: - # was: self._store.startDoc() - self._store.startDoc(self._formula) - - def endDoc(self) -> Optional["Formula"]: - """Signal end of document and stop parsing. returns formula""" - self._store.endDoc(self._formula) # don't canonicalize yet - return self._formula - - def makeStatement(self, quadruple): - # $$$$$$$$$$$$$$$$$$$$$ - # print "# Parser output: ", `quadruple` - self._store.makeStatement(quadruple, why=self._reason2) - - def makerdfstarStatement(self, quadruple): - # $$$$$$$$$$$$$$$$$$$$$ - # print "# Parser output: ", `quadruple` - self._store.makerdfstarStatement(quadruple, why=self._reason2) - - def statement(self, argstr: str, i: int) -> int: - r: typing.List[Any] = [] - i = self.object(argstr, i, r) # Allow literal for subject - extends RDF - if i < 0: - return i - - j = self.property_list(argstr, i, r[0]) - - if j < 0: - self.BadSyntax(argstr, i, "expected propertylist") - return j - - def subject(self, argstr: str, i: int, res: typing.List[Any]) -> int: - return self.item(argstr, i, res) - - def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: - """has _prop_ - is _prop_ of - a - = - _prop_ - >- prop -> - <- prop -< - _operator_""" - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - - r: typing.List[Any] = [] - - j = self.tok("has", argstr, i) - if j >= 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") - - i = self.prop(argstr, j, r) - if i < 0: - self.BadSyntax(argstr, j, "expected property after 'has'") - res.append(("->", r[0])) - return i - - j = self.tok("is", argstr, i) - if j >= 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") - - i = self.prop(argstr, j, r) - if i < 0: - self.BadSyntax(argstr, j, "expected after 'is'") - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax( - argstr, i, "End of file found, expected property after 'is'" - ) - i = j - j = self.tok("of", argstr, i) - if j < 0: - self.BadSyntax(argstr, i, "expected 'of' after 'is' ") - res.append(("<-", r[0])) - return j - - j = self.tok("a", argstr, i) - if j >= 0: - res.append(("->", RDF_type)) - return j - - if argstr[i : i + 2] == "<=": - if self.turtle: - self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") - - res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) - return i + 2 - - if argstr[i] == "=": - if self.turtle: - self.BadSyntax(argstr, i, "Found '=' in Turtle mode") - if argstr[i + 1] == ">": - res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) - return i + 2 - res.append(("->", DAML_sameAs)) - return i + 1 - - if argstr[i : i + 2] == ":=": - if self.turtle: - self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") - - # patch file relates two formulae, uses this @@ really? - res.append(("->", Logic_NS + "becomes")) - return i + 2 - - j = self.prop(argstr, i, r) - if j >= 0: - res.append(("->", r[0])) - return j - - if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": - self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") - - return -1 - - def prop(self, argstr: str, i: int, res): - return self.item(argstr, i, res) - - def item(self, argstr: str, i, res): - return self.path(argstr, i, res) - - def blankNode(self, uri=None): - return self._store.newBlankNode(self._context, uri, why=self._reason2) - - def path(self, argstr: str, i: int, res): - """Parse the path production.""" - j = self.nodeOrLiteral(argstr, i, res) - if j < 0: - return j # nope - - while argstr[j] in {"!", "^"}: # no spaces, must follow exactly (?) - ch = argstr[j] - subj = res.pop() - obj = self.blankNode(uri=self.here(j)) - j = self.node(argstr, j + 1, res) - if j < 0: - self.BadSyntax(argstr, j, "EOF found in middle of path syntax") - pred = res.pop() - if ch == "^": # Reverse traverse - self.makeStatement((self._context, pred, obj, subj)) - else: - self.makeStatement((self._context, pred, subj, obj)) - res.append(obj) - return j - - def anonymousNode(self, ln: str): - """Remember or generate a term for one of these _: anonymous nodes""" - # print("anonymousNode", self._anonymousNodes.get(ln, None), self._context, self._reason2) - if ("RdfstarTriple" in ln): - # print("new object") - # ln = ln.replace("RdfstarTriple", "") - term = self._rdfstartripleNodes.get(ln, None) - if term is not None: - return term - term = self._store.newRdfstarTriple(self._context, why=self._reason2, hashvalue = ln) - self._rdfstartripleNodes[ln] = term - return term - term = self._anonymousNodes.get(ln, None) - if term is not None: - return term - term = self._store.newBlankNode(self._context, why=self._reason2) - self._anonymousNodes[ln] = term - return term - - def node(self, argstr: str, i: int, res, subjectAlready=None): - """Parse the production. - Space is now skipped once at the beginning - instead of in multiple calls to self.skipSpace(). - """ - subj = subjectAlready - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - i = j - ch = argstr[i] # Quick 1-character checks first: - - if ch == "[": - bnodeID = self.here(i) - j = self.skipSpace(argstr, i + 1) - if j < 0: - self.BadSyntax(argstr, i, "EOF after '['") - # Hack for "is" binding name to anon node - if argstr[j] == "=": - if self.turtle: - self.BadSyntax( - argstr, j, "Found '[=' or '[ =' when in turtle mode." - ) - i = j + 1 - objs: typing.List[Any] = [] - j = self.objectList(argstr, i, objs) - if j >= 0: - subj = objs[0] - if len(objs) > 1: - for obj in objs: - self.makeStatement((self._context, DAML_sameAs, subj, obj)) - j = self.skipSpace(argstr, j) - if j < 0: - self.BadSyntax( - argstr, i, "EOF when objectList expected after [ = " - ) - if argstr[j] == ";": - j += 1 - else: - self.BadSyntax(argstr, i, "objectList expected after [= ") - - if subj is None: - subj = self.blankNode(uri=bnodeID) - i = self.property_list(argstr, j, subj) - if i < 0: - self.BadSyntax(argstr, j, "property_list expected") - - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax( - argstr, i, "EOF when ']' expected after [ " - ) - if argstr[j] != "]": - # print("asdadasd", argstr[j-1], argstr[j-2], argstr[j-3], argstr[j-4], argstr[j-5]) - self.BadSyntax(argstr, j, "']' expected") - res.append(subj) - return j + 1 - - if not self.turtle and ch == "{": - # if self.turtle: - # self.BadSyntax(argstr, i, - # "found '{' while in Turtle mode, Formulas not supported!") - ch2 = argstr[i + 1] - if ch2 == "$": - # a set - i += 1 - j = i + 1 - List = [] - first_run = True - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i : i + 2] == "$}": - j = i + 2 - break - - if not first_run: - if argstr[i] == ",": - i += 1 - else: - self.BadSyntax(argstr, i, "expected: ','") - else: - first_run = False - - item: typing.List[Any] = [] - j = self.item(argstr, i, item) # @@@@@ should be path, was object - if j < 0: - self.BadSyntax(argstr, i, "expected item in set or '$}'") - List.append(self._store.intern(item[0])) - res.append(self._store.newSet(List, self._context)) - return j - else: - # parse a formula - j = i + 1 - oldParentContext = self._parentContext - self._parentContext = self._context - parentAnonymousNodes = self._anonymousNodes - grandParentVariables = self._parentVariables - self._parentVariables = self._variables - self._anonymousNodes = {} - self._variables = self._variables.copy() - reason2 = self._reason2 - self._reason2 = becauseSubexpression - if subj is None: - subj = self._store.newFormula() - self._context = subj - - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "needed '}', found end.") - - if argstr[i] == "}": - j = i + 1 - break - j = self.directiveOrStatement(argstr, i) - if j < 0: - self.BadSyntax(argstr, i, "expected statement or '}'") - - self._anonymousNodes = parentAnonymousNodes - self._variables = self._parentVariables - self._parentVariables = grandParentVariables - self._context = self._parentContext - self._reason2 = reason2 - self._parentContext = oldParentContext - res.append(subj.close()) # No use until closed - return j - - if ch == "(": - thing_type = self._store.newList - ch2 = argstr[i + 1] - if ch2 == "$": - thing_type = self._store.newSet - i += 1 - j = i + 1 - - List = [] - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "needed ')', found end.") - if argstr[i] == ")": - j = i + 1 - break - - item = [] - j = self.item(argstr, i, item) # @@@@@ should be path, was object - if j < 0: - self.BadSyntax(argstr, i, "expected item in list or ')'") - List.append(self._store.intern(item[0])) - res.append(thing_type(List, self._context)) - return j - - j = self.tok("this", argstr, i) # This context - if j >= 0: - self.BadSyntax( - argstr, - i, - "Keyword 'this' was ancient N3. Now use " - + "@forSome and @forAll keywords.", - ) - - # booleans - j = self.tok("true", argstr, i) - if j >= 0: - res.append(True) - return j - j = self.tok("false", argstr, i) - if j >= 0: - res.append(False) - return j - - if subj is None: # If this can be a named node, then check for a name. - j = self.uri_ref2(argstr, i, res) - if j >= 0: - return j - - return -1 - - def addingquotedRdfstarTriple(self, quoted_triple_list, dira): - if quoted_triple_list[0] == rdflib.term.URIRef('https://w3id.org/rdf-star/AssertedStatement'): - if quoted_triple_list[1] == rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement'): - if dira == "->": - self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[3], quoted_triple_list[5])) - quoted_triple_list[2].setSubject(quoted_triple_list[3]) - quoted_triple_list[2].setPredicate(quoted_triple_list[4]) - quoted_triple_list[2].setObject(quoted_triple_list[5]) - - else: - self.makeStatement((self._context, quoted_triple_list[4], quoted_triple_list[5], quoted_triple_list[3])) - quoted_triple_list[2].setSubject(quoted_triple_list[4]) - quoted_triple_list[2].setPredicate(quoted_triple_list[5]) - quoted_triple_list[2].setObject(quoted_triple_list[6]) - - else: - if dira == "->": - self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[1], quoted_triple_list[3])) - else: - self.makeStatement((self._context, quoted_triple_list[2], quoted_triple_list[3], quoted_triple_list[1])) - else: - if dira == "->": - quoted_triple_list[1].setSubject(quoted_triple_list[2]) - quoted_triple_list[1].setPredicate(quoted_triple_list[3]) - quoted_triple_list[1].setObject(quoted_triple_list[4]) - - else: - quoted_triple_list[1].setSubject(quoted_triple_list[2]) - quoted_triple_list[1].setPredicate(quoted_triple_list[3]) - quoted_triple_list[1].setObject(quoted_triple_list[4]) - - def property_list(self, argstr: str, i: int, subj): - """Parse property list - Leaves the terminating punctuation in the buffer - """ - global quoted_triple_list - while 1: - while 1: # skip repeat ; - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax( - argstr, i, "EOF found when expected verb in property list" - ) - if argstr[j] != ";": - break - i = j + 1 - - if argstr[j : j + 2] == ":-": - if self.turtle: - self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") - i = j + 2 - res: typing.List[Any] = [] - # print("node in propertylist", self.node(argstr, i, res, subj)) - j = self.node(argstr, i, res, subj) - if j < 0: - self.BadSyntax(argstr, i, "bad {} or () or [] node after :- ") - i = j - continue - i = j - v: typing.List[Any] = [] - j = self.verb(argstr, i, v) - if j <= 0: - return i # void but valid - - objs: typing.List[Any] = [] - - i = self.objectList(argstr, j, objs) - # print("objectList in propertylist", objs) - if i < 0: - self.BadSyntax(argstr, j, "objectList expected") - - for obj in objs: - dira, sym = v[0] - if "RdfstarTriple" in subj: - # print("asdasdasd", obj) - if "rdf-star" in str(obj): - if len(quoted_triple_list) > 2: - quoted_triple_list = [] - quoted_triple_list.append(obj) - if (rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement') in quoted_triple_list) & (not (subj in quoted_triple_list)): - quoted_triple_list.append(subj) - if "#object" in sym: - # print("asdasdasd", quoted_triple_list) - self.addingquotedRdfstarTriple(quoted_triple_list, dira) - else: - if dira == "->": - # print("tests ->", self._context, sym, subj, obj) - self.makeStatement((self._context, sym, subj, obj)) - else: - self.makeStatement((self._context, sym, obj, subj)) - - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax(argstr, j, "EOF found in list of objects") - if argstr[i] != ";": - return i - i += 1 # skip semicolon and continue - - def commaSeparatedList(self, argstr: str, j, res, what): - """return value: -1 bad syntax; >1 new position in argstr - res has things found appended - """ - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "EOF found expecting comma sep list") - if argstr[i] == ".": - return j # empty list is OK - i = what(argstr, i, res) - if i < 0: - return -1 - - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - ch = argstr[j] - if ch != ",": - if ch != ".": - return -1 - return j # Found but not swallowed "." - i = what(argstr, j + 1, res) - if i < 0: - self.BadSyntax(argstr, i, "bad list content") - - def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: - # print("object in objectList") - i = self.object(argstr, i, res) - if i < 0: - return -1 - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax(argstr, j, "EOF found after object") - if argstr[j] != ",": - return j # Found something else! - i = self.object(argstr, j + 1, res) - if i < 0: - return i - - def checkDot(self, argstr: str, i: int): - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - ch = argstr[j] - if ch == ".": - return j + 1 # skip - if ch == "}": - return j # don't skip it - if ch == "]": - return j - self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") - - def uri_ref2(self, argstr: str, i: int, res): - """Generate uri from n3 representation. - Note that the RDF convention of directly concatenating - NS and local name is now used though I prefer inserting a '#' - to make the namesapces look more like what XML folks expect. - """ - qn: typing.List[Any] = [] - j = self.qname(argstr, i, qn) - if j >= 0: - pfx, ln = qn[0] - if pfx is None: - assert 0, "not used?" - ns = self._baseURI + ADDED_HASH # type: ignore[unreachable] - else: - try: - ns = self._bindings[pfx] - except KeyError: - # print("testuri2", pfx, ln) - if pfx == "_": # Magic prefix 2001/05/30, can be changed - # print("anonymousNode in uriref2") - res.append(self.anonymousNode(ln)) - return j - if not self.turtle and pfx == "": - ns = join(self._baseURI or "", "#") - else: - self.BadSyntax(argstr, i, 'Prefix "%s:" not bound' % (pfx)) - symb = self._store.newSymbol(ns + ln) - res.append(self._variables.get(symb, symb)) - return j - - i = self.skipSpace(argstr, i) - if i < 0: - return -1 - - if argstr[i] == "?": - v: typing.List[Any] = [] - j = self.variable(argstr, i, v) - if j > 0: # Forget variables as a class, only in context. - res.append(v[0]) - return j - return -1 - - elif argstr[i] == "<": - st = i + 1 - i = argstr.find(">", st) - if i >= 0: - uref = argstr[st:i] # the join should dealt with "": - - # expand unicode escapes - uref = unicodeEscape8.sub(unicodeExpand, uref) - uref = unicodeEscape4.sub(unicodeExpand, uref) - - if self._baseURI: - uref = join(self._baseURI, uref) # was: uripath.join - else: - assert ( - ":" in uref - ), "With no base URI, cannot deal with relative URIs" - if argstr[i - 1] == "#" and not uref[-1:] == "#": - uref += "#" # She meant it! Weirdness in urlparse? - symb = self._store.newSymbol(uref) - res.append(self._variables.get(symb, symb)) - return i + 1 - self.BadSyntax(argstr, j, "unterminated URI reference") - - elif self.keywordsSet: - v = [] - j = self.bareWord(argstr, i, v) - if j < 0: - return -1 # Forget variables as a class, only in context. - if v[0] in self.keywords: - self.BadSyntax(argstr, i, 'Keyword "%s" not allowed here.' % v[0]) - res.append(self._store.newSymbol(self._bindings[""] + v[0])) - return j - else: - return -1 - - def skipSpace(self, argstr: str, i: int): - """Skip white space, newlines and comments. - return -1 if EOF, else position of first non-ws character""" - - # Most common case is a non-commented line starting with few spaces and tabs. - try: - while True: - ch = argstr[i] - if ch in {" ", "\t"}: - i += 1 - continue - elif ch not in {"#", "\r", "\n"}: - return i - break - except IndexError: - return -1 - - while 1: - m = eol.match(argstr, i) - if m is None: - break - self.lines += 1 - self.startOfLine = i = m.end() # Point to first character unmatched - m = ws.match(argstr, i) - if m is not None: - i = m.end() - m = eof.match(argstr, i) - return i if m is None else -1 - - def variable(self, argstr: str, i: int, res): - """?abc -> variable(:abc)""" - - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - - if argstr[j] != "?": - return -1 - j += 1 - i = j - if argstr[j] in numberChars: - self.BadSyntax(argstr, j, "Variable name can't start with '%s'" % argstr[j]) - len_argstr = len(argstr) - while i < len_argstr and argstr[i] not in _notKeywordsChars: - i += 1 - if self._parentContext is None: - varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] - if varURI not in self._variables: - self._variables[varURI] = self._context.newUniversal( - varURI, why=self._reason2 - ) - res.append(self._variables[varURI]) - return i - # @@ was: - # self.BadSyntax(argstr, j, - # "Can't use ?xxx syntax for variable in outermost level: %s" - # % argstr[j-1:i]) - varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] - if varURI not in self._parentVariables: - self._parentVariables[varURI] = self._parentContext.newUniversal( - varURI, why=self._reason2 - ) - res.append(self._parentVariables[varURI]) - return i - - def bareWord(self, argstr: str, i: int, res): - """abc -> :abc""" - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - - if argstr[j] in numberChars or argstr[j] in _notKeywordsChars: - return -1 - i = j - len_argstr = len(argstr) - while i < len_argstr and argstr[i] not in _notKeywordsChars: - i += 1 - res.append(argstr[j:i]) - return i - - def qname(self, argstr: str, i: int, res): - """ - xyz:def -> ('xyz', 'def') - If not in keywords and keywordsSet: def -> ('', 'def') - :def -> ('', 'def') - """ - - i = self.skipSpace(argstr, i) - if i < 0: - return -1 - - c = argstr[i] - if c in numberCharsPlus: - return -1 - len_argstr = len(argstr) - if c not in _notNameChars: - j = i - i += 1 - - try: - while argstr[i] not in _notNameChars: - i += 1 - except IndexError: - pass # Very rare. - - if argstr[i - 1] == ".": # qname cannot end with "." - i -= 1 - if i == j: - return -1 - ln = argstr[j:i] - - else: # First character is non-alpha - ln = "" # Was: None - TBL (why? useful?) - - if i < len_argstr and argstr[i] == ":": - pfx = ln - # bnodes names have different rules - if pfx == "_": - allowedChars = _notNameChars - else: - allowedChars = _notQNameChars - - i += 1 - lastslash = False - start = i - ln = "" - while i < len_argstr: - c = argstr[i] - if c == "\\" and not lastslash: # Very rare. - lastslash = True - if start < i: - ln += argstr[start:i] - start = i + 1 - elif c not in allowedChars or lastslash: # Most common case is "a-zA-Z" - if lastslash: - if c not in escapeChars: - raise BadSyntax( - self._thisDoc, - self.lines, - argstr, - i, - "illegal escape " + c, - ) - elif c == "%": # Very rare. - if ( - argstr[i + 1] not in hexChars - or argstr[i + 2] not in hexChars - ): - raise BadSyntax( - self._thisDoc, - self.lines, - argstr, - i, - "illegal hex escape " + c, - ) - lastslash = False - else: - break - i += 1 - - if lastslash: - raise BadSyntax( - self._thisDoc, self.lines, argstr, i, "qname cannot end with \\" - ) - - if argstr[i - 1] == ".": - # localname cannot end in . - if len(ln) == 0 and start == i: - return -1 - i -= 1 - - if start < i: - ln += argstr[start:i] - - res.append((pfx, ln)) - return i - - else: # delimiter was not ":" - if ln and self.keywordsSet and ln not in self.keywords: - res.append(("", ln)) - return i - return -1 - - def object(self, argstr: str, i: int, res): - # print("subject in object") - j = self.subject(argstr, i, res) - if j >= 0: - return j - else: - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - else: - i = j - - ch = argstr[i] - if ch in self.string_delimiters: - ch_three = ch * 3 - if argstr[i : i + 3] == ch_three: - delim = ch_three - i += 3 - else: - delim = ch - i += 1 - - j, s = self.strconst(argstr, i, delim) - - res.append(self._store.newLiteral(s)) # type: ignore[call-arg] # TODO FIXME - return j - else: - return -1 - - def nodeOrLiteral(self, argstr: str, i: int, res): - # print("node in nodeOrLiteral") - j = self.node(argstr, i, res) - startline = self.lines # Remember where for error messages - if j >= 0: - return j - else: - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - else: - i = j - - ch = argstr[i] - if ch in numberCharsPlus: - m = exponent_syntax.match(argstr, i) - if m: - j = m.end() - res.append(float(argstr[i:j])) - return j - - m = decimal_syntax.match(argstr, i) - if m: - j = m.end() - res.append(Decimal(argstr[i:j])) - return j - - m = integer_syntax.match(argstr, i) - if m: - j = m.end() - res.append(long_type(argstr[i:j])) - return j - - # return -1 ## or fall through? - - ch_three = ch * 3 - if ch in self.string_delimiters: - if argstr[i : i + 3] == ch_three: - delim = ch_three - i += 3 - else: - delim = ch - i += 1 - - dt = None - j, s = self.strconst(argstr, i, delim) - lang = None - if argstr[j] == "@": # Language? - m = langcode.match(argstr, j + 1) - if m is None: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "Bad language code syntax on string " + "literal, after @", - ) - i = m.end() - lang = argstr[j + 1 : i] - j = i - if argstr[j : j + 2] == "^^": - res2: typing.List[Any] = [] - # print("nodeorLiteral") - j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI - dt = res2[0] - res.append(self._store.newLiteral(s, dt, lang)) - return j - else: - return -1 - - def uriOf(self, sym): - if isinstance(sym, tuple): - return sym[1] # old system for --pipe - # return sym.uriref() # cwm api - return sym - - def strconst(self, argstr: str, i: int, delim): - """parse an N3 string constant delimited by delim. - return index, val - """ - delim1 = delim[0] - delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 - - j = i - ustr = "" # Empty unicode string - startline = self.lines # Remember where for error messages - len_argstr = len(argstr) - while j < len_argstr: - if argstr[j] == delim1: - if delim == delim1: # done when delim is " or ' - i = j + 1 - return i, ustr - if ( - delim == delim3 - ): # done when delim is """ or ''' and, respectively ... - if argstr[j : j + 5] == delim5: # ... we have "" or '' before - i = j + 5 - ustr += delim2 - return i, ustr - if argstr[j : j + 4] == delim4: # ... we have " or ' before - i = j + 4 - ustr += delim1 - return i, ustr - if argstr[j : j + 3] == delim3: # current " or ' is part of delim - i = j + 3 - return i, ustr - - # we are inside of the string and current char is " or ' - j += 1 - ustr += delim1 - continue - - m = interesting.search(argstr, j) # was argstr[j:]. - # Note for pos param to work, MUST be compiled ... re bug? - assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20 : j], - argstr[j : j + 20], - ) # at least need a quote - - i = m.start() - try: - ustr += argstr[j:i] - except UnicodeError: - err = "" - for c in argstr[j:i]: - err = err + (" %02x" % ord(c)) - streason = sys.exc_info()[1].__str__() - raise BadSyntax( - self._thisDoc, - startline, - argstr, - j, - "Unicode error appending characters" - + " %s to string, because\n\t%s" % (err, streason), - ) - - # print "@@@ i = ",i, " j=",j, "m.end=", m.end() - - ch = argstr[i] - if ch == delim1: - j = i - continue - elif ch in {'"', "'"} and ch != delim1: - ustr += ch - j = i + 1 - continue - elif ch in {"\r", "\n"}: - if delim == delim1: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "newline found in string literal", - ) - self.lines += 1 - ustr += ch - j = i + 1 - self.startOfLine = j - - elif ch == "\\": - j = i + 1 - ch = argstr[j] # Will be empty if string ends - if not ch: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "unterminated string literal (2)", - ) - k = "abfrtvn\\\"'".find(ch) - if k >= 0: - uch = "\a\b\f\r\t\v\n\\\"'"[k] - ustr += uch - j += 1 - elif ch == "u": - j, ch = self.uEscape(argstr, j + 1, startline) - ustr += ch - elif ch == "U": - j, ch = self.UEscape(argstr, j + 1, startline) - ustr += ch - else: - self.BadSyntax(argstr, i, "bad escape") - - self.BadSyntax(argstr, i, "unterminated string literal") - - def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): - if len(argstr) < i + n: - raise BadSyntax( - self._thisDoc, startline, argstr, i, "unterminated string literal(3)" - ) - try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) - except: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "bad string literal hex escape: " + argstr[i : i + n], - ) - - def uEscape(self, argstr: str, i, startline): - return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, "u") - - def UEscape(self, argstr: str, i, startline): - return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U") - - def BadSyntax(self, argstr: str, i, msg): - raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) - - -# If we are going to do operators then they should generate -# [ is operator:plus of ( \1 \2 ) ] - - -class BadSyntax(SyntaxError): - def __init__(self, uri, lines, argstr, i, why): - self._str = argstr.encode("utf-8") # Better go back to strings for errors - self._i = i - self._why = why - self.lines = lines - self._uri = uri - - def __str__(self): - argstr = self._str - i = self._i - st = 0 - if i > 60: - pre = "..." - st = i - 60 - else: - pre = "" - if len(argstr) - i > 60: - post = "..." - else: - post = "" - - return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' % ( - self.lines + 1, - self._uri, - self._why, - pre, - argstr[st:i], - argstr[i : i + 60], - post, - ) - - @property - def message(self): - return str(self) - - -############################################################################### -class Formula(object): - number = 0 - - def __init__(self, parent): - self.uuid = uuid4().hex - self.counter = 0 - Formula.number += 1 - self.number = Formula.number - self.existentials = {} - self.universals = {} - - self.quotedgraph = QuotedGraph(store=parent.store, identifier=self.id()) - - def __str__(self): - return "_:Formula%s" % self.number - - def id(self): - return BNode("_:Formula%s" % self.number) - - def newBlankNode(self, uri=None, why=None): - # print("newBlankNode in Formula") - if uri is None: - self.counter += 1 - bn = BNode("f%sb%s" % (self.uuid, self.counter)) - else: - bn = BNode(uri.split("#").pop().replace("_", "b")) - return bn - - def newRdfstarTriple(self, hashvalue, uri=None, why=None): - # print("newRdfstarTriple in Formula") - if uri is None: - # self.counter += 1 - rdfstartriple = RdfstarTriple(hashvalue = hashvalue) - else: - rdfstartriple = RdfstarTriple(hashvalue = hashvalue) - return rdfstartriple - - def newUniversal(self, uri, why=None): - return Variable(uri.split("#").pop()) - - def declareExistential(self, x): - self.existentials[x] = self.newBlankNode() - - def close(self): - - return self.quotedgraph - - -r_hibyte = re.compile(r"([\x80-\xff])") - - -class RDFSink(object): - def __init__(self, graph: Graph): - self.rootFormula: Optional[Formula] = None - self.uuid = uuid4().hex - self.counter = 0 - self.graph = graph - - def newFormula(self) -> Formula: - fa = getattr(self.graph.store, "formula_aware", False) - if not fa: - raise ParserError( - "Cannot create formula parser with non-formula-aware store." - ) - f = Formula(self.graph) - return f - - def newGraph(self, identifier: Identifier) -> Graph: - return Graph(self.graph.store, identifier) - - def newSymbol(self, *args: str): - return URIRef(args[0]) - - def newBlankNode( - self, - arg: Optional[Union[Formula, Graph, Any]] = None, - uri: Optional[str] = None, - why: Optional[Callable[[], None]] = None, - ) -> BNode: - # print("newBlankNode in RDFSink") - if isinstance(arg, Formula): - # print("newBlankNode in Formula", arg, uri) - return arg.newBlankNode(uri) - elif isinstance(arg, Graph) or arg is None: - # print("newBlankNode in RDFSink Graph", arg, uri, self.uuid, self.counter,"n%sb%s" % (self.uuid, self.counter)) - self.counter += 1 - bn = BNode("n%sb%s" % (self.uuid, self.counter)) - else: - # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "b")) - bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) - return bn - - def newRdfstarTriple( - self, - # hashvalue: Optional[str], - # arg: Optional[Union[Formula, Graph, Any]] = None, - # uri: Optional[str] = None, - arg: Optional[Union[Formula, Graph, Any]] = None, - uri: Optional[str] = None, - why: Optional[Callable[[], None]] = None, - hashvalue: Optional[str] = None - ) -> RdfstarTriple: - # print("newRdflibRdfstartriple in Formula") - if isinstance(arg, Formula): - # print("testsv2", arg, uri) - return arg.newRdfstarTriple(hashvalue = hashvalue) - elif isinstance(arg, Graph) or arg is None: - # print("newRdflibRdfstartriple", hashvalue) - # self.counter += 1 - rdfstartriple = RdfstarTriple(hashvalue =hashvalue) - else: - # print("newRdflibRdfstartriple",hashvalue) - # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "rdfstartriple")) - rdfstartriple = RdfstarTriple(hashvalue =hashvalue) - return rdfstartriple - - def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: - if dt: - return Literal(s, datatype=dt) - else: - return Literal(s, lang=lang) - - def newList(self, n: typing.List[Any], f: Optional[Formula]): - # print("testnewlist") - nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") - if not n: - return nil - - first = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#first") - rest = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest") - af = a = self.newBlankNode(f) - - for ne in n[:-1]: - self.makeStatement((f, first, a, ne)) - an = self.newBlankNode(f) - self.makeStatement((f, rest, a, an)) - a = an - self.makeStatement((f, first, a, n[-1])) - self.makeStatement((f, rest, a, nil)) - return af - - def newSet(self, *args): - return set(args) - - def setDefaultNamespace(self, *args) -> str: - return ":".join(repr(n) for n in args) - - def makeStatement(self, quadruple, why=None) -> None: - # print("testmakeStatement", quadruple) - f, p, s, o = quadruple - - if hasattr(p, "formula"): - raise ParserError("Formula used as predicate") - - s = self.normalise(f, s) - p = self.normalise(f, p) - o = self.normalise(f, o) - if f == self.rootFormula: - # print s, p, o, '.' - self.graph.add((s, p, o)) - elif isinstance(f, Formula): - # print("quotedgraph added") - f.quotedgraph.add((s, p, o)) - else: - f.add((s, p, o)) - - # return str(quadruple) - - def makerdfstarStatement(self, quadruple, why=None) -> None: - # print("testmakeStatement", quadruple) - f, hashnode, p, s, o = quadruple - - if hasattr(p, "formula"): - raise ParserError("Formula used as predicate") - - s = self.normalise(f, s) - p = self.normalise(f, p) - o = self.normalise(f, o) - # print("testmakerdfstarStatement", hashnode, s,p,o) - if f == self.rootFormula: - # print s, p, o, '.' - self.graph.addStarTriple((hashnode, s, p, o)) - elif isinstance(f, Formula): - f.quotedgraph.addStarTriple((hashnode, s, p, o)) - else: - f.addStarTriple((hashnode, s, p, o)) - - # return str(quadruple) - - def normalise(self, f: Optional[Formula], n): - if isinstance(n, tuple): - return URIRef(str(n[1])) - - if isinstance(n, bool): - s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) - return s - - if isinstance(n, int) or isinstance(n, long_type): - s = Literal(str(n), datatype=INTEGER_DATATYPE) - return s - - if isinstance(n, Decimal): - value = str(n) - if value == "-0": - value = "0" - s = Literal(value, datatype=DECIMAL_DATATYPE) - return s - - if isinstance(n, float): - s = Literal(str(n), datatype=DOUBLE_DATATYPE) - return s - - if isinstance(f, Formula): - if n in f.existentials: - return f.existentials[n] - - # if isinstance(n, Var): - # if f.universals.has_key(n): - # return f.universals[n] - # f.universals[n] = f.newBlankNode() - # return f.universals[n] - - return n - - def intern(self, something: AnyT) -> AnyT: - return something - - def bind(self, pfx, uri): - pass # print pfx, ':', uri - - def startDoc(self, formula: Optional[Formula]): - self.rootFormula = formula - - def endDoc(self, formula: Optional[Formula]) -> None: - pass - - -################################################### -# -# Utilities -# - - -def hexify(ustr): - """Use URL encoding to return an ASCII string - corresponding to the given UTF8 string - >>> hexify("http://example/a b") - b'http://example/a%20b' - """ - # s1=ustr.encode('utf-8') - s = "" - for ch in ustr: # .encode('utf-8'): - if ord(ch) > 126 or ord(ch) < 33: - ch = "%%%02X" % ord(ch) - else: - ch = "%c" % ord(ch) - s = s + ch - return s.encode("latin-1") - -class TrigSinkParser(SinkParser): +class TrigSinkParser(StarsinkParser): def directiveOrStatement(self, argstr, h): # import pdb; pdb.set_trace() @@ -2679,9 +680,7 @@ def labelOrSubject(self, argstr, i, res): def graph(self, argstr, i): """ Parse trig graph, i.e. - = { .. triples .. } - return -1 if it doesn't look like a graph-decl raise Exception if it looks like a graph, but isn't. """ @@ -2774,26 +773,13 @@ def parse( # TODO: update N3Processor so that it can use conj_graph as the sink conj_graph.namespace_manager = graph.namespace_manager - sink = RDFSink(conj_graph) + sink = StarRDFSink(conj_graph) baseURI = conj_graph.absolutize( source.getPublicId() or source.getSystemId() or "" ) p = TrigSinkParser(sink, baseURI=baseURI, turtle=True) - # return ??? - # sink = RDFSink(graph) - - # baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") - # p = SinkParser(sink, baseURI=baseURI, turtle=turtle) - # N3 parser prefers str stream - # stream = source.getCharacterStream() - # if not stream: - # stream = source.getByteStream() - # p.loadStream(stream) - - # print("tests", source) - if hasattr(source, "file"): f = open(source.file.name, "rb") rdbytes = f.read() @@ -2805,7 +791,10 @@ def parse( f.close() bp = rdbytes.decode("utf-8") - ou = RDFstarParsings(bp) + if "<<" or "{|" in bp: + ou = RDFstarParsings(bp) + else: + ou = bp # print(ou) p.feed(ou) p.endDoc() diff --git a/rdflib/plugins/parsers/turtlestar.py b/rdflib/plugins/parsers/turtlestar.py index 584583244..3e61221dc 100644 --- a/rdflib/plugins/parsers/turtlestar.py +++ b/rdflib/plugins/parsers/turtlestar.py @@ -1,42 +1,15 @@ -#!/usr/bin/env python -""" -notation3.py - Standalone Notation3 Parser -Derived from CWM, the Closed World Machine -Authors of the original suite: -* Dan Connolly <@@> -* Tim Berners-Lee <@@> -* Yosi Scharf <@@> -* Joseph M. Reagle Jr. -* Rich Salz -http://www.w3.org/2000/10/swap/notation3.py -Copyright 2000-2007, World Wide Web Consortium. -Copyright 2001, MIT. -Copyright 2001, Zolera Systems Inc. -License: W3C Software License -http://www.w3.org/Consortium/Legal/copyright-software -Modified by Sean B. Palmer -Copyright 2007, Sean B. Palmer. -Modified to work with rdflib by Gunnar Aastrand Grimnes -Copyright 2010, Gunnar A. Grimnes -""" -import codecs -import os -import re -from smtplib import quotedata -import sys + import rdflib # importing typing for `typing.List` because `List`` is used for something else import typing -from decimal import Decimal from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union from uuid import uuid4 +from .notation3 import SinkParser, RDFSink, runNamespace -from rdflib.compat import long_type from rdflib.exceptions import ParserError from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph from rdflib.term import ( - _XSD_PFX, RdfstarTriple, BNode, Identifier, @@ -66,185 +39,7 @@ AnyT = TypeVar("AnyT") - -def splitFragP(uriref, punct=0): - """split a URI reference before the fragment - Punctuation is kept. - e.g. - >>> splitFragP("abc#def") - ('abc', '#def') - >>> splitFragP("abcdef") - ('abcdef', '') - """ - - i = uriref.rfind("#") - if i >= 0: - return uriref[:i], uriref[i:] - else: - return uriref, "" - - -def join(here, there): - """join an absolute URI and URI reference - (non-ascii characters are supported/doctested; - haven't checked the details of the IRI spec though) - ``here`` is assumed to be absolute. - ``there`` is URI reference. - >>> join('http://example/x/y/z', '../abc') - 'http://example/x/abc' - Raise ValueError if there uses relative path - syntax but here has no hierarchical path. - >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - raise ValueError(here) - ValueError: Base has no slash - after colon - with relative '../foo'. - >>> join('http://example/x/y/z', '') - 'http://example/x/y/z' - >>> join('mid:foo@example', '#foo') - 'mid:foo@example#foo' - We grok IRIs - >>> len(u'Andr\\xe9') - 5 - >>> join('http://example.org/', u'#Andr\\xe9') - u'http://example.org/#Andr\\xe9' - """ - - # assert(here.find("#") < 0), \ - # "Base may not contain hash: '%s'" % here # why must caller splitFrag? - - slashl = there.find("/") - colonl = there.find(":") - - # join(base, 'foo:/') -- absolute - if colonl >= 0 and (slashl < 0 or colonl < slashl): - return there - - bcolonl = here.find(":") - assert bcolonl >= 0, ( - "Base uri '%s' is not absolute" % here - ) # else it's not absolute - - path, frag = splitFragP(there) - if not path: - return here + frag - - # join('mid:foo@example', '../foo') bzzt - if here[bcolonl + 1 : bcolonl + 2] != "/": - raise ValueError( - "Base <%s> has no slash after " - "colon - with relative '%s'." % (here, there) - ) - - if here[bcolonl + 1 : bcolonl + 3] == "//": - bpath = here.find("/", bcolonl + 3) - else: - bpath = bcolonl + 1 - - # join('http://xyz', 'foo') - if bpath < 0: - bpath = len(here) - here = here + "/" - - # join('http://xyz/', '//abc') => 'http://abc' - if there[:2] == "//": - return here[: bcolonl + 1] + there - - # join('http://xyz/', '/abc') => 'http://xyz/abc' - if there[:1] == "/": - return here[:bpath] + there - - slashr = here.rfind("/") - - while 1: - if path[:2] == "./": - path = path[2:] - if path == ".": - path = "" - elif path[:3] == "../" or path == "..": - path = path[3:] - i = here.rfind("/", bpath, slashr) - if i >= 0: - here = here[: i + 1] - slashr = i - else: - break - - return here[: slashr + 1] + path + frag - - -def base(): - """The base URI for this process - the Web equiv of cwd - Relative or absolute unix-standard filenames parsed relative to - this yield the URI of the file. - If we had a reliable way of getting a computer name, - we should put it in the hostname just to prevent ambiguity - """ - # return "file://" + hostname + os.getcwd() + "/" - return "file://" + _fixslash(os.getcwd()) + "/" - - -def _fixslash(s): - """Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" - s = s.replace("\\", "/") - if s[0] != "/" and s[1] == ":": - s = s[2:] # @@@ Hack when drive letter present - return s - - -CONTEXT = 0 -PRED = 1 -SUBJ = 2 -OBJ = 3 - -PARTS = PRED, SUBJ, OBJ -ALL4 = CONTEXT, PRED, SUBJ, OBJ - -SYMBOL = 0 -FORMULA = 1 -LITERAL = 2 -LITERAL_DT = 21 -LITERAL_LANG = 22 -ANONYMOUS = 3 -XMLLITERAL = 25 - -Logic_NS = "http://www.w3.org/2000/10/swap/log#" -NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging -forSomeSym = Logic_NS + "forSome" -forAllSym = Logic_NS + "forAll" - -RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" -RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -OWL_NS = "http://www.w3.org/2002/07/owl#" -DAML_sameAs_URI = OWL_NS + "sameAs" -parsesTo_URI = Logic_NS + "parsesTo" -RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" - -List_NS = RDF_NS_URI # From 20030808 -_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" - -N3_first = (SYMBOL, List_NS + "first") -N3_rest = (SYMBOL, List_NS + "rest") -N3_li = (SYMBOL, List_NS + "li") -N3_nil = (SYMBOL, List_NS + "nil") -N3_List = (SYMBOL, List_NS + "List") -N3_Empty = (SYMBOL, List_NS + "Empty") - - -runNamespaceValue = None - - -def runNamespace(): - """Returns a URI suitable as a namespace for run-local objects""" - # @@@ include hostname (privacy?) (hash it?) - global runNamespaceValue - if runNamespaceValue is None: - runNamespaceValue = join(base(), _unique_id()) + "#" - return runNamespaceValue - - -nextu = 0 - +nextu=0 import re import lark import hashlib @@ -400,7 +195,6 @@ def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str iri: IRIREF | prefixed_name prefixed_name: PNAME_LN | PNAME_NS blank_node: BLANK_NODE_LABEL | ANON - BASE_DIRECTIVE: "@base" IRIREF: "<" (/[^\x00-\x20<>"{}|^`\\]/ | UCHAR)* ">" PNAME_NS: PN_PREFIX? ":" @@ -429,7 +223,6 @@ def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str PERCENT: "%" HEX~2 HEX: /[0-9A-Fa-f]/ PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ - %ignore WS COMMENT: "#" /[^\n]/* %ignore COMMENT @@ -527,7 +320,6 @@ def compoundanno(self, var): for x in var.children[1].children: test = Reconstructorv2(turtle_lark).reconstruct(x) - if "{|" in test: test123 = test.split("{|",1) @@ -536,18 +328,19 @@ def compoundanno(self, var): test123.pop(0) test_annotation = "{|"+ "".join(test123) + result = annotation_dict[test_annotation] - if not tri2 in annotation_dict: - annotation_dict[tri2] = [object,result] - else: - annotation_dict[tri2].append(object) - annotation_dict[tri2].append(result) + appends1.append(object) + appends1.append(result) else: - if not tri2 in annotation_dict: - annotation_dict[tri2] = [test] - else: - annotation_dict[tri2].append(test) + appends1.append(test) + + if not tri2 in annotation_dict: + annotation_dict[tri2] = appends1 + elif not appends1 == annotation_dict[tri2]: + for x in appends1: + annotation_dict[tri2].append(x) class FindVariables(Visitor): def __init__(self): @@ -909,74 +702,9 @@ def becauseSubexpression(*args, **kargs): # print args, kargs pass - -N3_forSome_URI = forSomeSym -N3_forAll_URI = forAllSym - -# Magic resources we know about - -ADDED_HASH = "#" # Stop where we use this in case we want to remove it! -# This is the hash on namespace URIs - -RDF_type = (SYMBOL, RDF_type_URI) -DAML_sameAs = (SYMBOL, DAML_sameAs_URI) - -LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" - -BOOLEAN_DATATYPE = _XSD_PFX + "boolean" -DECIMAL_DATATYPE = _XSD_PFX + "decimal" -DOUBLE_DATATYPE = _XSD_PFX + "double" -FLOAT_DATATYPE = _XSD_PFX + "float" -INTEGER_DATATYPE = _XSD_PFX + "integer" - -option_noregen = 0 # If set, do not regenerate genids on output - -# @@ I18n - the notname chars need extending for well known unicode non-text -# characters. The XML spec switched to assuming unknown things were name -# characters. -# _namechars = string.lowercase + string.uppercase + string.digits + '_-' -_notQNameChars = set("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ -_notKeywordsChars = _notQNameChars | {"."} -_notNameChars = _notQNameChars | {":"} # Assume anything else valid name :-/ -_rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" - -hexChars = set("ABCDEFabcdef0123456789") -escapeChars = set("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames -numberChars = set("0123456789-") -numberCharsPlus = numberChars | {"+", "."} - - -def unicodeExpand(m): - try: - return chr(int(m.group(1), 16)) - except: - raise Exception("Invalid unicode code point: " + m.group(1)) - - -unicodeEscape4 = re.compile(r"\\u([0-9a-fA-F]{4})") -unicodeEscape8 = re.compile(r"\\U([0-9a-fA-F]{8})") - - -N3CommentCharacter = "#" # For unix script # ! compatibility - -# Parse string to sink -# -# Regular expressions: -eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment -eof = re.compile(r"[ \t]*(#[^\n]*)?$") # end of file, poss. w/comment -ws = re.compile(r"[ \t]*") # Whitespace not including NL -signed_integer = re.compile(r"[-+]?[0-9]+") # integer -integer_syntax = re.compile(r"[-+]?[0-9]+") -decimal_syntax = re.compile(r"[-+]?[0-9]*\.[0-9]+") -exponent_syntax = re.compile( - r"[-+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:e|E)[-+]?[0-9]+" -) -digitstring = re.compile(r"[0-9]+") # Unsigned integer -interesting = re.compile(r"""[\\\r\n\"\']""") -langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") - quoted_triple_list = [] -class SinkParser: + +class StarsinkParser(SinkParser): def __init__( self, store: "RDFSink", @@ -1055,448 +783,11 @@ def __init__( self._context = self._formula self._parentContext: Optional[Formula] = None - def here(self, i: int) -> str: - """String generated from position in file - This is for repeatability when referring people to bnodes in a document. - This has diagnostic uses less formally, as it should point one to which - bnode the arbitrary identifier actually is. It gives the - line and character number of the '[' charcacter or path character - which introduced the blank node. The first blank node is boringly - _L1C1. It used to be used only for tracking, but for tests in general - it makes the canonical ordering of bnodes repeatable.""" - - return "%s_L%iC%i" % (self._genPrefix, self.lines, i - self.startOfLine + 1) - - def formula(self): - return self._formula - - def loadStream(self, stream: Union[IO[str], IO[bytes]]) -> Optional["Formula"]: - return self.loadBuf(stream.read()) # Not ideal - - def loadBuf(self, buf: Union[str, bytes]): - """Parses a buffer and returns its top level formula""" - self.startDoc() - - self.feed(buf) - return self.endDoc() # self._formula - - def feed(self, octets: Union[str, bytes]): - """Feed an octet stream to the parser - if BadSyntax is raised, the string - passed in the exception object is the - remainder after any statements have been parsed. - So if there is more data to feed to the - parser, it should be straightforward to recover.""" - - if not isinstance(octets, str): - s = octets.decode("utf-8") - # NB already decoded, so \ufeff - if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode("utf-8"): - s = s[1:] - else: - s = octets - - i = 0 - while i >= 0: - j = self.skipSpace(s, i) - if j < 0: - return - i = self.directiveOrStatement(s, j) - if i < 0: - # print("# next char: %s" % s[j-5:j+5]) - # print("asdadasd", i, j) - self.BadSyntax(s, j, "expected directive or statement") - - def directiveOrStatement(self, argstr: str, h: int) -> int: - - i = self.skipSpace(argstr, h) - if i < 0: - return i # EOF - - if self.turtle: - j = self.sparqlDirective(argstr, i) - if j >= 0: - return j - - j = self.directive(argstr, i) - if j >= 0: - return self.checkDot(argstr, j) - j = self.statement(argstr, i) - if j >= 0: - return self.checkDot(argstr, j) - - return j - - # @@I18N - # _namechars = string.lowercase + string.uppercase + string.digits + '_-' - - def tok(self, tok: str, argstr: str, i: int, colon: bool = False): - """Check for keyword. Space must have been stripped on entry and - we must not be at end of file. - if colon, then keyword followed by colon is ok - (@prefix: is ok, rdf:type shortcut a must be followed by ws) - """ - - assert tok[0] not in _notNameChars # not for punctuation - if argstr[i] == "@": - i += 1 - else: - if tok not in self.keywords: - return -1 # No, this has neither keywords declaration nor "@" - - i_plus_len_tok = i + len(tok) - if ( - argstr[i:i_plus_len_tok] == tok - and (argstr[i_plus_len_tok] in _notKeywordsChars) - or (colon and argstr[i_plus_len_tok] == ":") - ): - return i_plus_len_tok - else: - return -1 - - def sparqlTok(self, tok: str, argstr: str, i: int) -> int: - """Check for SPARQL keyword. Space must have been stripped on entry - and we must not be at end of file. - Case insensitive and not preceded by @ - """ - - assert tok[0] not in _notNameChars # not for punctuation - - len_tok = len(tok) - if argstr[i : i + len_tok].lower() == tok.lower() and ( - argstr[i + len_tok] in _notQNameChars - ): - i += len_tok - return i - else: - return -1 - - def directive(self, argstr: str, i: int) -> int: - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - res: typing.List[Any] = [] - - j = self.tok("bind", argstr, i) # implied "#". Obsolete. - if j > 0: - self.BadSyntax(argstr, i, "keyword bind is obsolete: use @prefix") - - j = self.tok("keywords", argstr, i) - if j > 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") - - i = self.commaSeparatedList(argstr, j, res, self.bareWord) - if i < 0: - self.BadSyntax( - argstr, i, "'@keywords' needs comma separated list of words" - ) - self.setKeywords(res[:]) - return i - - j = self.tok("forAll", argstr, i) - if j > 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") - - i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) - if i < 0: - self.BadSyntax(argstr, i, "Bad variable list after @forAll") - for x in res: - # self._context.declareUniversal(x) - if x not in self._variables or x in self._parentVariables: - self._variables[x] = self._context.newUniversal(x) - return i - - j = self.tok("forSome", argstr, i) - if j > 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") - - i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) - if i < 0: - self.BadSyntax(argstr, i, "Bad variable list after @forSome") - for x in res: - self._context.declareExistential(x) - return i - - j = self.tok("prefix", argstr, i, colon=True) # no implied "#" - if j >= 0: - t: typing.List[Any] = [] - i = self.qname(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected qname after @prefix") - j = self.uri_ref2(argstr, i, t) - if j < 0: - self.BadSyntax(argstr, i, "expected after @prefix _qname_") - ns = self.uriOf(t[1]) - - if self._baseURI: - ns = join(self._baseURI, ns) - elif ":" not in ns: - self.BadSyntax( - argstr, - j, - f"With no base URI, cannot use relative URI in @prefix <{ns}>", - ) - assert ":" in ns # must be absolute - self._bindings[t[0][0]] = ns - self.bind(t[0][0], hexify(ns)) - return j - - j = self.tok("base", argstr, i) # Added 2007/7/7 - if j >= 0: - t = [] - i = self.uri_ref2(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected after @base ") - ns = self.uriOf(t[0]) - - if self._baseURI: - ns = join(self._baseURI, ns) - else: - self.BadSyntax( - argstr, - j, - "With no previous base URI, cannot use " - + "relative URI in @base <" - + ns - + ">", - ) - assert ":" in ns # must be absolute - self._baseURI = ns - return i - - return -1 # Not a directive, could be something else. - - def sparqlDirective(self, argstr: str, i: int): - - """ - turtle and trig support BASE/PREFIX without @ and without - terminating . - """ - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - - j = self.sparqlTok("PREFIX", argstr, i) - if j >= 0: - t: typing.List[Any] = [] - i = self.qname(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected qname after @prefix") - j = self.uri_ref2(argstr, i, t) - if j < 0: - self.BadSyntax(argstr, i, "expected after @prefix _qname_") - ns = self.uriOf(t[1]) - - if self._baseURI: - ns = join(self._baseURI, ns) - elif ":" not in ns: - self.BadSyntax( - argstr, - j, - "With no base URI, cannot use " - + "relative URI in @prefix <" - + ns - + ">", - ) - assert ":" in ns # must be absolute - self._bindings[t[0][0]] = ns - self.bind(t[0][0], hexify(ns)) - return j - - j = self.sparqlTok("BASE", argstr, i) - if j >= 0: - t = [] - i = self.uri_ref2(argstr, j, t) - if i < 0: - self.BadSyntax(argstr, j, "expected after @base ") - ns = self.uriOf(t[0]) - - if self._baseURI: - ns = join(self._baseURI, ns) - else: - self.BadSyntax( - argstr, - j, - "With no previous base URI, cannot use " - + "relative URI in @base <" - + ns - + ">", - ) - assert ":" in ns # must be absolute - self._baseURI = ns - return i - - return -1 # Not a directive, could be something else. - - def bind(self, qn: str, uri: bytes) -> None: - assert isinstance(uri, bytes), "Any unicode must be %x-encoded already" - if qn == "": - self._store.setDefaultNamespace(uri) - else: - self._store.bind(qn, uri) - - def setKeywords(self, k: Optional[typing.List[str]]): - """Takes a list of strings""" - if k is None: - self.keywordsSet = 0 - else: - self.keywords = k - self.keywordsSet = 1 - - def startDoc(self) -> None: - # was: self._store.startDoc() - self._store.startDoc(self._formula) - - def endDoc(self) -> Optional["Formula"]: - """Signal end of document and stop parsing. returns formula""" - self._store.endDoc(self._formula) # don't canonicalize yet - return self._formula - - def makeStatement(self, quadruple): - # $$$$$$$$$$$$$$$$$$$$$ - # print "# Parser output: ", `quadruple` - self._store.makeStatement(quadruple, why=self._reason2) - def makerdfstarStatement(self, quadruple): # $$$$$$$$$$$$$$$$$$$$$ # print "# Parser output: ", `quadruple` self._store.makerdfstarStatement(quadruple, why=self._reason2) - def statement(self, argstr: str, i: int) -> int: - r: typing.List[Any] = [] - i = self.object(argstr, i, r) # Allow literal for subject - extends RDF - if i < 0: - return i - - j = self.property_list(argstr, i, r[0]) - - if j < 0: - self.BadSyntax(argstr, i, "expected propertylist") - return j - - def subject(self, argstr: str, i: int, res: typing.List[Any]) -> int: - return self.item(argstr, i, res) - - def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: - """has _prop_ - is _prop_ of - a - = - _prop_ - >- prop -> - <- prop -< - _operator_""" - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - - r: typing.List[Any] = [] - - j = self.tok("has", argstr, i) - if j >= 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") - - i = self.prop(argstr, j, r) - if i < 0: - self.BadSyntax(argstr, j, "expected property after 'has'") - res.append(("->", r[0])) - return i - - j = self.tok("is", argstr, i) - if j >= 0: - if self.turtle: - self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") - - i = self.prop(argstr, j, r) - if i < 0: - self.BadSyntax(argstr, j, "expected after 'is'") - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax( - argstr, i, "End of file found, expected property after 'is'" - ) - i = j - j = self.tok("of", argstr, i) - if j < 0: - self.BadSyntax(argstr, i, "expected 'of' after 'is' ") - res.append(("<-", r[0])) - return j - - j = self.tok("a", argstr, i) - if j >= 0: - res.append(("->", RDF_type)) - return j - - if argstr[i : i + 2] == "<=": - if self.turtle: - self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") - - res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) - return i + 2 - - if argstr[i] == "=": - if self.turtle: - self.BadSyntax(argstr, i, "Found '=' in Turtle mode") - if argstr[i + 1] == ">": - res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) - return i + 2 - res.append(("->", DAML_sameAs)) - return i + 1 - - if argstr[i : i + 2] == ":=": - if self.turtle: - self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") - - # patch file relates two formulae, uses this @@ really? - res.append(("->", Logic_NS + "becomes")) - return i + 2 - - j = self.prop(argstr, i, r) - if j >= 0: - res.append(("->", r[0])) - return j - - if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": - self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") - - return -1 - - def prop(self, argstr: str, i: int, res): - return self.item(argstr, i, res) - - def item(self, argstr: str, i, res): - return self.path(argstr, i, res) - - def blankNode(self, uri=None): - return self._store.newBlankNode(self._context, uri, why=self._reason2) - - def path(self, argstr: str, i: int, res): - """Parse the path production.""" - j = self.nodeOrLiteral(argstr, i, res) - if j < 0: - return j # nope - - while argstr[j] in {"!", "^"}: # no spaces, must follow exactly (?) - ch = argstr[j] - subj = res.pop() - obj = self.blankNode(uri=self.here(j)) - j = self.node(argstr, j + 1, res) - if j < 0: - self.BadSyntax(argstr, j, "EOF found in middle of path syntax") - pred = res.pop() - if ch == "^": # Reverse traverse - self.makeStatement((self._context, pred, obj, subj)) - else: - self.makeStatement((self._context, pred, subj, obj)) - res.append(obj) - return j - def anonymousNode(self, ln: str): """Remember or generate a term for one of these _: anonymous nodes""" # print("anonymousNode", self._anonymousNodes.get(ln, None), self._context, self._reason2) @@ -1516,187 +807,6 @@ def anonymousNode(self, ln: str): self._anonymousNodes[ln] = term return term - def node(self, argstr: str, i: int, res, subjectAlready=None): - """Parse the production. - Space is now skipped once at the beginning - instead of in multiple calls to self.skipSpace(). - """ - subj = subjectAlready - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - i = j - ch = argstr[i] # Quick 1-character checks first: - - if ch == "[": - bnodeID = self.here(i) - j = self.skipSpace(argstr, i + 1) - if j < 0: - self.BadSyntax(argstr, i, "EOF after '['") - # Hack for "is" binding name to anon node - if argstr[j] == "=": - if self.turtle: - self.BadSyntax( - argstr, j, "Found '[=' or '[ =' when in turtle mode." - ) - i = j + 1 - objs: typing.List[Any] = [] - j = self.objectList(argstr, i, objs) - if j >= 0: - subj = objs[0] - if len(objs) > 1: - for obj in objs: - self.makeStatement((self._context, DAML_sameAs, subj, obj)) - j = self.skipSpace(argstr, j) - if j < 0: - self.BadSyntax( - argstr, i, "EOF when objectList expected after [ = " - ) - if argstr[j] == ";": - j += 1 - else: - self.BadSyntax(argstr, i, "objectList expected after [= ") - - if subj is None: - subj = self.blankNode(uri=bnodeID) - i = self.property_list(argstr, j, subj) - if i < 0: - self.BadSyntax(argstr, j, "property_list expected") - - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax( - argstr, i, "EOF when ']' expected after [ " - ) - if argstr[j] != "]": - # print("asdadasd", argstr[j-1], argstr[j-2], argstr[j-3], argstr[j-4], argstr[j-5]) - self.BadSyntax(argstr, j, "']' expected") - res.append(subj) - return j + 1 - - if not self.turtle and ch == "{": - # if self.turtle: - # self.BadSyntax(argstr, i, - # "found '{' while in Turtle mode, Formulas not supported!") - ch2 = argstr[i + 1] - if ch2 == "$": - # a set - i += 1 - j = i + 1 - List = [] - first_run = True - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i : i + 2] == "$}": - j = i + 2 - break - - if not first_run: - if argstr[i] == ",": - i += 1 - else: - self.BadSyntax(argstr, i, "expected: ','") - else: - first_run = False - - item: typing.List[Any] = [] - j = self.item(argstr, i, item) # @@@@@ should be path, was object - if j < 0: - self.BadSyntax(argstr, i, "expected item in set or '$}'") - List.append(self._store.intern(item[0])) - res.append(self._store.newSet(List, self._context)) - return j - else: - # parse a formula - j = i + 1 - oldParentContext = self._parentContext - self._parentContext = self._context - parentAnonymousNodes = self._anonymousNodes - grandParentVariables = self._parentVariables - self._parentVariables = self._variables - self._anonymousNodes = {} - self._variables = self._variables.copy() - reason2 = self._reason2 - self._reason2 = becauseSubexpression - if subj is None: - subj = self._store.newFormula() - self._context = subj - - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "needed '}', found end.") - - if argstr[i] == "}": - j = i + 1 - break - j = self.directiveOrStatement(argstr, i) - if j < 0: - self.BadSyntax(argstr, i, "expected statement or '}'") - - self._anonymousNodes = parentAnonymousNodes - self._variables = self._parentVariables - self._parentVariables = grandParentVariables - self._context = self._parentContext - self._reason2 = reason2 - self._parentContext = oldParentContext - res.append(subj.close()) # No use until closed - return j - - if ch == "(": - thing_type = self._store.newList - ch2 = argstr[i + 1] - if ch2 == "$": - thing_type = self._store.newSet - i += 1 - j = i + 1 - - List = [] - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "needed ')', found end.") - if argstr[i] == ")": - j = i + 1 - break - - item = [] - j = self.item(argstr, i, item) # @@@@@ should be path, was object - if j < 0: - self.BadSyntax(argstr, i, "expected item in list or ')'") - List.append(self._store.intern(item[0])) - res.append(thing_type(List, self._context)) - return j - - j = self.tok("this", argstr, i) # This context - if j >= 0: - self.BadSyntax( - argstr, - i, - "Keyword 'this' was ancient N3. Now use " - + "@forSome and @forAll keywords.", - ) - - # booleans - j = self.tok("true", argstr, i) - if j >= 0: - res.append(True) - return j - j = self.tok("false", argstr, i) - if j >= 0: - res.append(False) - return j - - if subj is None: # If this can be a named node, then check for a name. - j = self.uri_ref2(argstr, i, res) - if j >= 0: - return j - - return -1 - def addingquotedRdfstarTriple(self, quoted_triple_list, dira): if quoted_triple_list[0] == rdflib.term.URIRef('https://w3id.org/rdf-star/AssertedStatement'): if quoted_triple_list[1] == rdflib.term.URIRef('https://w3id.org/rdf-star/QuotedStatement'): @@ -1773,15 +883,6 @@ def property_list(self, argstr: str, i: int, subj): if i < 0: self.BadSyntax(argstr, j, "objectList expected") - # assertedtriple = False - # quotedtriple = False - # assertedtriple_s_p_o = [] - # quotedtriple_hashnode_s_p_o = [] - # for obj in objs: - # dira, sym = v[0] - # print("test make statement objsssss", sym, subj, obj) - # if "RdfstarTriple" in subj: - for obj in objs: dira, sym = v[0] if "RdfstarTriple" in subj: @@ -1809,603 +910,6 @@ def property_list(self, argstr: str, i: int, subj): return i i += 1 # skip semicolon and continue - def commaSeparatedList(self, argstr: str, j, res, what): - """return value: -1 bad syntax; >1 new position in argstr - res has things found appended - """ - i = self.skipSpace(argstr, j) - if i < 0: - self.BadSyntax(argstr, i, "EOF found expecting comma sep list") - if argstr[i] == ".": - return j # empty list is OK - i = what(argstr, i, res) - if i < 0: - return -1 - - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - ch = argstr[j] - if ch != ",": - if ch != ".": - return -1 - return j # Found but not swallowed "." - i = what(argstr, j + 1, res) - if i < 0: - self.BadSyntax(argstr, i, "bad list content") - - def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: - # print("object in objectList") - i = self.object(argstr, i, res) - if i < 0: - return -1 - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - self.BadSyntax(argstr, j, "EOF found after object") - if argstr[j] != ",": - return j # Found something else! - i = self.object(argstr, j + 1, res) - if i < 0: - return i - - def checkDot(self, argstr: str, i: int): - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - ch = argstr[j] - if ch == ".": - return j + 1 # skip - if ch == "}": - return j # don't skip it - if ch == "]": - return j - self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") - - def uri_ref2(self, argstr: str, i: int, res): - """Generate uri from n3 representation. - Note that the RDF convention of directly concatenating - NS and local name is now used though I prefer inserting a '#' - to make the namesapces look more like what XML folks expect. - """ - qn: typing.List[Any] = [] - j = self.qname(argstr, i, qn) - if j >= 0: - pfx, ln = qn[0] - if pfx is None: - assert 0, "not used?" - ns = self._baseURI + ADDED_HASH # type: ignore[unreachable] - else: - try: - ns = self._bindings[pfx] - except KeyError: - # print("testuri2", pfx, ln) - if pfx == "_": # Magic prefix 2001/05/30, can be changed - # print("anonymousNode in uriref2") - res.append(self.anonymousNode(ln)) - return j - if not self.turtle and pfx == "": - ns = join(self._baseURI or "", "#") - else: - self.BadSyntax(argstr, i, 'Prefix "%s:" not bound' % (pfx)) - symb = self._store.newSymbol(ns + ln) - res.append(self._variables.get(symb, symb)) - return j - - i = self.skipSpace(argstr, i) - if i < 0: - return -1 - - if argstr[i] == "?": - v: typing.List[Any] = [] - j = self.variable(argstr, i, v) - if j > 0: # Forget variables as a class, only in context. - res.append(v[0]) - return j - return -1 - - elif argstr[i] == "<": - st = i + 1 - i = argstr.find(">", st) - if i >= 0: - uref = argstr[st:i] # the join should dealt with "": - - # expand unicode escapes - uref = unicodeEscape8.sub(unicodeExpand, uref) - uref = unicodeEscape4.sub(unicodeExpand, uref) - - if self._baseURI: - uref = join(self._baseURI, uref) # was: uripath.join - else: - assert ( - ":" in uref - ), "With no base URI, cannot deal with relative URIs" - if argstr[i - 1] == "#" and not uref[-1:] == "#": - uref += "#" # She meant it! Weirdness in urlparse? - symb = self._store.newSymbol(uref) - res.append(self._variables.get(symb, symb)) - return i + 1 - self.BadSyntax(argstr, j, "unterminated URI reference") - - elif self.keywordsSet: - v = [] - j = self.bareWord(argstr, i, v) - if j < 0: - return -1 # Forget variables as a class, only in context. - if v[0] in self.keywords: - self.BadSyntax(argstr, i, 'Keyword "%s" not allowed here.' % v[0]) - res.append(self._store.newSymbol(self._bindings[""] + v[0])) - return j - else: - return -1 - - def skipSpace(self, argstr: str, i: int): - """Skip white space, newlines and comments. - return -1 if EOF, else position of first non-ws character""" - - # Most common case is a non-commented line starting with few spaces and tabs. - try: - while True: - ch = argstr[i] - if ch in {" ", "\t"}: - i += 1 - continue - elif ch not in {"#", "\r", "\n"}: - return i - break - except IndexError: - return -1 - - while 1: - m = eol.match(argstr, i) - if m is None: - break - self.lines += 1 - self.startOfLine = i = m.end() # Point to first character unmatched - m = ws.match(argstr, i) - if m is not None: - i = m.end() - m = eof.match(argstr, i) - return i if m is None else -1 - - def variable(self, argstr: str, i: int, res): - """?abc -> variable(:abc)""" - - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - - if argstr[j] != "?": - return -1 - j += 1 - i = j - if argstr[j] in numberChars: - self.BadSyntax(argstr, j, "Variable name can't start with '%s'" % argstr[j]) - len_argstr = len(argstr) - while i < len_argstr and argstr[i] not in _notKeywordsChars: - i += 1 - if self._parentContext is None: - varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] - if varURI not in self._variables: - self._variables[varURI] = self._context.newUniversal( - varURI, why=self._reason2 - ) - res.append(self._variables[varURI]) - return i - # @@ was: - # self.BadSyntax(argstr, j, - # "Can't use ?xxx syntax for variable in outermost level: %s" - # % argstr[j-1:i]) - varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] - if varURI not in self._parentVariables: - self._parentVariables[varURI] = self._parentContext.newUniversal( - varURI, why=self._reason2 - ) - res.append(self._parentVariables[varURI]) - return i - - def bareWord(self, argstr: str, i: int, res): - """abc -> :abc""" - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - - if argstr[j] in numberChars or argstr[j] in _notKeywordsChars: - return -1 - i = j - len_argstr = len(argstr) - while i < len_argstr and argstr[i] not in _notKeywordsChars: - i += 1 - res.append(argstr[j:i]) - return i - - def qname(self, argstr: str, i: int, res): - """ - xyz:def -> ('xyz', 'def') - If not in keywords and keywordsSet: def -> ('', 'def') - :def -> ('', 'def') - """ - - i = self.skipSpace(argstr, i) - if i < 0: - return -1 - - c = argstr[i] - if c in numberCharsPlus: - return -1 - len_argstr = len(argstr) - if c not in _notNameChars: - j = i - i += 1 - - try: - while argstr[i] not in _notNameChars: - i += 1 - except IndexError: - pass # Very rare. - - if argstr[i - 1] == ".": # qname cannot end with "." - i -= 1 - if i == j: - return -1 - ln = argstr[j:i] - - else: # First character is non-alpha - ln = "" # Was: None - TBL (why? useful?) - - if i < len_argstr and argstr[i] == ":": - pfx = ln - # bnodes names have different rules - if pfx == "_": - allowedChars = _notNameChars - else: - allowedChars = _notQNameChars - - i += 1 - lastslash = False - start = i - ln = "" - while i < len_argstr: - c = argstr[i] - if c == "\\" and not lastslash: # Very rare. - lastslash = True - if start < i: - ln += argstr[start:i] - start = i + 1 - elif c not in allowedChars or lastslash: # Most common case is "a-zA-Z" - if lastslash: - if c not in escapeChars: - raise BadSyntax( - self._thisDoc, - self.lines, - argstr, - i, - "illegal escape " + c, - ) - elif c == "%": # Very rare. - if ( - argstr[i + 1] not in hexChars - or argstr[i + 2] not in hexChars - ): - raise BadSyntax( - self._thisDoc, - self.lines, - argstr, - i, - "illegal hex escape " + c, - ) - lastslash = False - else: - break - i += 1 - - if lastslash: - raise BadSyntax( - self._thisDoc, self.lines, argstr, i, "qname cannot end with \\" - ) - - if argstr[i - 1] == ".": - # localname cannot end in . - if len(ln) == 0 and start == i: - return -1 - i -= 1 - - if start < i: - ln += argstr[start:i] - - res.append((pfx, ln)) - return i - - else: # delimiter was not ":" - if ln and self.keywordsSet and ln not in self.keywords: - res.append(("", ln)) - return i - return -1 - - def object(self, argstr: str, i: int, res): - # print("subject in object") - j = self.subject(argstr, i, res) - if j >= 0: - return j - else: - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - else: - i = j - - ch = argstr[i] - if ch in self.string_delimiters: - ch_three = ch * 3 - if argstr[i : i + 3] == ch_three: - delim = ch_three - i += 3 - else: - delim = ch - i += 1 - - j, s = self.strconst(argstr, i, delim) - - res.append(self._store.newLiteral(s)) # type: ignore[call-arg] # TODO FIXME - return j - else: - return -1 - - def nodeOrLiteral(self, argstr: str, i: int, res): - # print("node in nodeOrLiteral") - j = self.node(argstr, i, res) - startline = self.lines # Remember where for error messages - if j >= 0: - return j - else: - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - else: - i = j - - ch = argstr[i] - if ch in numberCharsPlus: - m = exponent_syntax.match(argstr, i) - if m: - j = m.end() - res.append(float(argstr[i:j])) - return j - - m = decimal_syntax.match(argstr, i) - if m: - j = m.end() - res.append(Decimal(argstr[i:j])) - return j - - m = integer_syntax.match(argstr, i) - if m: - j = m.end() - res.append(long_type(argstr[i:j])) - return j - - # return -1 ## or fall through? - - ch_three = ch * 3 - if ch in self.string_delimiters: - if argstr[i : i + 3] == ch_three: - delim = ch_three - i += 3 - else: - delim = ch - i += 1 - - dt = None - j, s = self.strconst(argstr, i, delim) - lang = None - if argstr[j] == "@": # Language? - m = langcode.match(argstr, j + 1) - if m is None: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "Bad language code syntax on string " + "literal, after @", - ) - i = m.end() - lang = argstr[j + 1 : i] - j = i - if argstr[j : j + 2] == "^^": - res2: typing.List[Any] = [] - # print("nodeorLiteral") - j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI - dt = res2[0] - res.append(self._store.newLiteral(s, dt, lang)) - return j - else: - return -1 - - def uriOf(self, sym): - if isinstance(sym, tuple): - return sym[1] # old system for --pipe - # return sym.uriref() # cwm api - return sym - - def strconst(self, argstr: str, i: int, delim): - """parse an N3 string constant delimited by delim. - return index, val - """ - delim1 = delim[0] - delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 - - j = i - ustr = "" # Empty unicode string - startline = self.lines # Remember where for error messages - len_argstr = len(argstr) - while j < len_argstr: - if argstr[j] == delim1: - if delim == delim1: # done when delim is " or ' - i = j + 1 - return i, ustr - if ( - delim == delim3 - ): # done when delim is """ or ''' and, respectively ... - if argstr[j : j + 5] == delim5: # ... we have "" or '' before - i = j + 5 - ustr += delim2 - return i, ustr - if argstr[j : j + 4] == delim4: # ... we have " or ' before - i = j + 4 - ustr += delim1 - return i, ustr - if argstr[j : j + 3] == delim3: # current " or ' is part of delim - i = j + 3 - return i, ustr - - # we are inside of the string and current char is " or ' - j += 1 - ustr += delim1 - continue - - m = interesting.search(argstr, j) # was argstr[j:]. - # Note for pos param to work, MUST be compiled ... re bug? - assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20 : j], - argstr[j : j + 20], - ) # at least need a quote - - i = m.start() - try: - ustr += argstr[j:i] - except UnicodeError: - err = "" - for c in argstr[j:i]: - err = err + (" %02x" % ord(c)) - streason = sys.exc_info()[1].__str__() - raise BadSyntax( - self._thisDoc, - startline, - argstr, - j, - "Unicode error appending characters" - + " %s to string, because\n\t%s" % (err, streason), - ) - - # print "@@@ i = ",i, " j=",j, "m.end=", m.end() - - ch = argstr[i] - if ch == delim1: - j = i - continue - elif ch in {'"', "'"} and ch != delim1: - ustr += ch - j = i + 1 - continue - elif ch in {"\r", "\n"}: - if delim == delim1: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "newline found in string literal", - ) - self.lines += 1 - ustr += ch - j = i + 1 - self.startOfLine = j - - elif ch == "\\": - j = i + 1 - ch = argstr[j] # Will be empty if string ends - if not ch: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "unterminated string literal (2)", - ) - k = "abfrtvn\\\"'".find(ch) - if k >= 0: - uch = "\a\b\f\r\t\v\n\\\"'"[k] - ustr += uch - j += 1 - elif ch == "u": - j, ch = self.uEscape(argstr, j + 1, startline) - ustr += ch - elif ch == "U": - j, ch = self.UEscape(argstr, j + 1, startline) - ustr += ch - else: - self.BadSyntax(argstr, i, "bad escape") - - self.BadSyntax(argstr, i, "unterminated string literal") - - def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): - if len(argstr) < i + n: - raise BadSyntax( - self._thisDoc, startline, argstr, i, "unterminated string literal(3)" - ) - try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) - except: - raise BadSyntax( - self._thisDoc, - startline, - argstr, - i, - "bad string literal hex escape: " + argstr[i : i + n], - ) - - def uEscape(self, argstr: str, i, startline): - return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, "u") - - def UEscape(self, argstr: str, i, startline): - return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U") - - def BadSyntax(self, argstr: str, i, msg): - raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) - - -# If we are going to do operators then they should generate -# [ is operator:plus of ( \1 \2 ) ] - - -class BadSyntax(SyntaxError): - def __init__(self, uri, lines, argstr, i, why): - self._str = argstr.encode("utf-8") # Better go back to strings for errors - self._i = i - self._why = why - self.lines = lines - self._uri = uri - - def __str__(self): - argstr = self._str - i = self._i - st = 0 - if i > 60: - pre = "..." - st = i - 60 - else: - pre = "" - if len(argstr) - i > 60: - post = "..." - else: - post = "" - - return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' % ( - self.lines + 1, - self._uri, - self._why, - pre, - argstr[st:i], - argstr[i : i + 60], - post, - ) - - @property - def message(self): - return str(self) - - ############################################################################### class Formula(object): number = 0 @@ -2454,50 +958,7 @@ def close(self): return self.quotedgraph - -r_hibyte = re.compile(r"([\x80-\xff])") - - -class RDFSink(object): - def __init__(self, graph: Graph): - self.rootFormula: Optional[Formula] = None - self.uuid = uuid4().hex - self.counter = 0 - self.graph = graph - - def newFormula(self) -> Formula: - fa = getattr(self.graph.store, "formula_aware", False) - if not fa: - raise ParserError( - "Cannot create formula parser with non-formula-aware store." - ) - f = Formula(self.graph) - return f - - def newGraph(self, identifier: Identifier) -> Graph: - return Graph(self.graph.store, identifier) - - def newSymbol(self, *args: str): - return URIRef(args[0]) - - def newBlankNode( - self, - arg: Optional[Union[Formula, Graph, Any]] = None, - uri: Optional[str] = None, - why: Optional[Callable[[], None]] = None, - ) -> BNode: - # print("newBlankNode in RDFSink") - if isinstance(arg, Formula): - # print("newBlankNode in Formula", arg, uri) - return arg.newBlankNode(uri) - elif isinstance(arg, Graph) or arg is None: - # print("newBlankNode in RDFSink Graph", arg, uri, self.uuid, self.counter,"n%sb%s" % (self.uuid, self.counter)) - self.counter += 1 - bn = BNode("n%sb%s" % (self.uuid, self.counter)) - else: - # print("testsv24", arg, uri, str(arg[0]).split("#").pop().replace("_", "b")) - bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) - return bn +class StarRDFSink(RDFSink): def newRdfstarTriple( self, @@ -2523,58 +984,6 @@ def newRdfstarTriple( rdfstartriple = RdfstarTriple(hashvalue =hashvalue) return rdfstartriple - def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: - if dt: - return Literal(s, datatype=dt) - else: - return Literal(s, lang=lang) - - def newList(self, n: typing.List[Any], f: Optional[Formula]): - # print("testnewlist") - nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") - if not n: - return nil - - first = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#first") - rest = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest") - af = a = self.newBlankNode(f) - - for ne in n[:-1]: - self.makeStatement((f, first, a, ne)) - an = self.newBlankNode(f) - self.makeStatement((f, rest, a, an)) - a = an - self.makeStatement((f, first, a, n[-1])) - self.makeStatement((f, rest, a, nil)) - return af - - def newSet(self, *args): - return set(args) - - def setDefaultNamespace(self, *args) -> str: - return ":".join(repr(n) for n in args) - - def makeStatement(self, quadruple, why=None) -> None: - # print("testmakeStatement", quadruple) - f, p, s, o = quadruple - - if hasattr(p, "formula"): - raise ParserError("Formula used as predicate") - - s = self.normalise(f, s) - p = self.normalise(f, p) - o = self.normalise(f, o) - if f == self.rootFormula: - # print s, p, o, '.' - self.graph.add((s, p, o)) - elif isinstance(f, Formula): - # print("quotedgraph added") - f.quotedgraph.add((s, p, o)) - else: - f.add((s, p, o)) - - # return str(quadruple) - def makerdfstarStatement(self, quadruple, why=None) -> None: # print("testmakeStatement", quadruple) f, hashnode, p, s, o = quadruple @@ -2594,79 +1003,6 @@ def makerdfstarStatement(self, quadruple, why=None) -> None: else: f.addStarTriple((hashnode, s, p, o)) - # return str(quadruple) - - def normalise(self, f: Optional[Formula], n): - if isinstance(n, tuple): - return URIRef(str(n[1])) - - if isinstance(n, bool): - s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) - return s - - if isinstance(n, int) or isinstance(n, long_type): - s = Literal(str(n), datatype=INTEGER_DATATYPE) - return s - - if isinstance(n, Decimal): - value = str(n) - if value == "-0": - value = "0" - s = Literal(value, datatype=DECIMAL_DATATYPE) - return s - - if isinstance(n, float): - s = Literal(str(n), datatype=DOUBLE_DATATYPE) - return s - - if isinstance(f, Formula): - if n in f.existentials: - return f.existentials[n] - - # if isinstance(n, Var): - # if f.universals.has_key(n): - # return f.universals[n] - # f.universals[n] = f.newBlankNode() - # return f.universals[n] - - return n - - def intern(self, something: AnyT) -> AnyT: - return something - - def bind(self, pfx, uri): - pass # print pfx, ':', uri - - def startDoc(self, formula: Optional[Formula]): - self.rootFormula = formula - - def endDoc(self, formula: Optional[Formula]) -> None: - pass - - -################################################### -# -# Utilities -# - - -def hexify(ustr): - """Use URL encoding to return an ASCII string - corresponding to the given UTF8 string - >>> hexify("http://example/a b") - b'http://example/a%20b' - """ - # s1=ustr.encode('utf-8') - s = "" - for ch in ustr: # .encode('utf-8'): - if ord(ch) > 126 or ord(ch) < 33: - ch = "%%%02X" % ord(ch) - else: - ch = "%c" % ord(ch) - s = s + ch - return s.encode("latin-1") - - class TurtleParser(Parser): """ @@ -2689,10 +1025,10 @@ def parse( "N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding ) - sink = RDFSink(graph) + sink = StarRDFSink(graph) baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") - p = SinkParser(sink, baseURI=baseURI, turtle=turtle) + p = StarsinkParser(sink, baseURI=baseURI, turtle=turtle) # N3 parser prefers str stream # stream = source.getCharacterStream() # if not stream: @@ -2711,7 +1047,10 @@ def parse( f.close() bp = rdbytes.decode("utf-8") - ou = RDFstarParsings(bp) + if ("<<" in bp) or ("{|" in bp): + ou = RDFstarParsings(bp) + else: + ou = bp # print(ou) p.feed(ou) p.endDoc()