diff --git a/.github/workflows/cd_release.yml b/.github/workflows/removed_for_now/cd_release.yml similarity index 100% rename from .github/workflows/cd_release.yml rename to .github/workflows/removed_for_now/cd_release.yml diff --git a/.github/workflows/ci_cd_updated_master.yml b/.github/workflows/removed_for_now/ci_cd_updated_master.yml similarity index 100% rename from .github/workflows/ci_cd_updated_master.yml rename to .github/workflows/removed_for_now/ci_cd_updated_master.yml diff --git a/.github/workflows/ci_dependabot.yml b/.github/workflows/removed_for_now/ci_dependabot.yml similarity index 100% rename from .github/workflows/ci_dependabot.yml rename to .github/workflows/removed_for_now/ci_dependabot.yml diff --git a/docs/api_reference/utils/rdf.md b/docs/api_reference/utils/rdf.md new file mode 100644 index 00000000..d0f79656 --- /dev/null +++ b/docs/api_reference/utils/rdf.md @@ -0,0 +1,3 @@ +# rdf + +::: oteapi.utils.rdf diff --git a/oteapi/utils/rdf.py b/oteapi/utils/rdf.py new file mode 100644 index 00000000..6bd943a8 --- /dev/null +++ b/oteapi/utils/rdf.py @@ -0,0 +1,113 @@ +"""Utility functions for representing instances of pydantic models as rdf. + +This module uses JSON-LD with a shared context on https://w3id.org/domain/oteio/context +""" + +import io +import json +from pathlib import Path +from typing import TYPE_CHECKING + +import rdflib +import yaml + +if TYPE_CHECKING: # pragma: no cover + from typing import Any, Optional, TextIO, Union + + # import tripper + + +def load_content( + source: "Optional[Union[Path, str, TextIO]]" = None, + data: "Optional[str]" = None, + format: "Optional[str]" = None, +) -> "Any": + """Load content from yaml or json source. + + Arguments: + source: File name or file-like object with data documentation to add. + data: String containing the data documentation to add. + format: Input format. One of: "yaml", "json". + By default it will be inferred from `source` or `data`. + + Returns: + Python representation of the content. + """ + if not source and not data: + raise TypeError("Either `source` or `data` must be given.") + + if source and isinstance(source, (str, Path)): + with open(source, "rt") as f: + return load_content(source=f, format=format) + + if format is None: + if source: + format = Path(source.name).suffix + elif data.lstrip().startswith("---"): + format = "yaml" + elif data.lstrip().startswith("{"): + format = "json" + + if format is None: + raise ValueError("Format cannot be inferred. Use `format` argument.") + + format = format.lstrip(".").lower() + if format in ("yaml", "yml"): + if not source: + source = io.StringIO(data) + content = yaml.safe_load(source) + elif format in ("json"): + content = json.load(source) if source else json.loads(data) + else: + raise TypeError(f"Unsupported format: {format}") + + return content + + +def add_resource( + source: "Optional[Union[Path, str, TextIO]]" = None, + data: "Optional[dict, str]" = None, + format: "Optional[str]" = None, + graph: "Optional[Union[rdflib.Graph, Any]]" = None, +) -> "Union[rdflib.Graph, Any]": + """Add documentation of data resource(s) to triplestore. + + Arguments: + + source: File name or file-like object with data documentation + to add. + data: Dict or string containing the data documentation to add. + format: Input format. One of: "yaml", "json". + By default it will be inferred from `source` or `data`. + graph: The graph to add the documentation to. It can be a + rdflib.Graph object or any type that has a parse() method + that supports json-ld. + If not given, a new rdflib.Graph object will be created. + + Returns: + The provided graph or a new rdflib.Graph object, if `graph` is + None. + """ + if isinstance(data, dict): + content = data.copy() + else: + content = load_content(source=source, data=data, format=format) + + if not isinstance(content, dict): + raise TypeError("Expected input content to be a dict.") + + if "@context" not in content: + content["@context"] = "https://w3id.org/emmo/domain/oteio/context" + + if not graph: + graph = rdflib.Graph() + + with open("xxx.json", "wt") as f: + json.dump(content, f, indent=2) + + # print("=====================================") + # print(json.dumps(content, indent=2)) + + # graph.parse(data=content, format="json-ld") + graph.parse(source="xxx.json", format="json-ld") + return graph diff --git a/pyproject.toml b/pyproject.toml index 8ab420b5..dca19e54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "pydantic~=2.7", "pydantic-settings~=2.2", "typing-extensions~=4.11; python_version < '3.10'", + "rdflib>=6.3", # Strategy dependencies "celery>=5.3.5,<6", diff --git a/tests/models/test_jsonld.py b/tests/models/test_jsonld.py new file mode 100644 index 00000000..f926b904 --- /dev/null +++ b/tests/models/test_jsonld.py @@ -0,0 +1,237 @@ +import json +from pathlib import Path + +import rdflib +from rdflib.plugins.shared.jsonld.context import Context + +from oteapi.utils import rdf + +thisdir = Path(__file__).resolve().parent +testdir = thisdir.parent +staticdir = testdir / "static" + +# s = """ +# { +# "@context": { +# "@vocab": "http://xmlns.com/foaf/0.1/", +# "knows": {"@type": "@id"} +# }, +# "@id": "http://manu.sporny.org/about#manu", +# "@type": "Person", +# "name": "Manu Sporny", +# "knows": { +# "@id": "https://greggkellogg.net/foaf#me", +# "@type": "Person", +# "name": "Gregg Kellogg" +# } +# } +# """ +# g = rdflib.Graph() +# g.parse(data=s, format="json-ld") +# #print(g.serialize(format="turtle")) +# #print("------------------------------------------------") +# #print() +# +# +# conf = """ +# { +# "@context": { +# "oteio": "https://w3id.org/emmo/domain/oteio#", +# "dcat": "http://www.w3.org/ns/dcat#", +# "dcterms": "http://purl.org/dc/terms/", +# +# "downloadURL": "dcat:downloadURL", +# "mediaType": "dcat:mediaType", +# "license": "dcterms:license", +# "driver": "oteio:driver", +# "configuration": "oteio:configuration", +# "dataresource": "oteio:dataresource" +# }, +# "http://example.com/ex/faithfull": { +# "@type": "oteio:Source", +# "@id": "http://example.com/ex/faithfull", +# "dataresource": { +# "downloadURL": "http://example.com/datasets/faithfull.csv", +# "mediaType": "application/csv", +# "license": "https://creativecommons.org/licenses/by/4.0/legalcode", +# "configuration": { +# "driver": "csv" +# } +# } +# } +# } +# """ +# # "http://example.com/ex/faithfull": { +# g = rdflib.Graph() +# #g.bind("ex", "http://example.com/ex/") +# g.parse(data=conf, format="json-ld") +# #print(g.serialize(format="turtle")) +# #print("------------------------------------------------") +# #print() + + +conf2 = """ +{ + "@context": { + "@version": 1.1, + + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "dcterms": "http://purl.org/dc/terms/", + "dcat": "http://www.w3.org/ns/dcat#", + "oteio": "https://w3id.org/emmo/domain/oteio#", + + "resources": "@nest", + "configuration": { + "@id": "oteio:configuration", + "@type": "@json" + }, + "dataresource": { + "@id": "oteio:dataresource", + "@type": "oteio:DataResource", + "@nest": "resources" + }, + "downloadURL": "dcat:downloadURL", + "mediaType": "dcat:mediaType", + "license": "dcterms:license", + + "parse": "oteio:parse", + "parserType": "oteio:parserType", + "datamodel": "oteio:datamodel", + + "driver": "oteio:driver" + }, + + "resources": [ + { + "@type": "oteio:Source", + "@id": "http://example.com/ex/faithfull", + "dataresource": { + "downloadURL": "http://example.com/datasets/faithfull.csv", + "mediaType": "application/csv", + "license": "https://creativecommons.org/licenses/by/4.0/legalcode" + }, + "parse": { + "parserType": "application/vnd.dlite-parse", + "datamodel": "http://onto-ns.com/meta/calm/0.1/Composition", + "configuration": { + "driver": "csv" + } + } + } + ] +} +""" +# "http://example.com/ex/faithfull": { +g = rdflib.Graph() +# g.bind("ex", "http://example.com/ex/") +# print(json.loads(conf2)) +g.parse(data=conf2, format="json-ld") +print(g.serialize(format="turtle")) +print("------------------------------------------------") +print() +context_data = json.loads(conf2).get("@context") +context = Context(context_data) +graph_data = json.loads( + g.serialize(format="json-ld", context_data=context_data, auto_compact=True) +).get("@graph") +# print(json.dumps(graph_data, indent=2)) +# print("------------------------------------------------") +# print() + + +def expand(item): + """Returns `item` with all all references to blank nodes expanded.""" + d = {} + for k, v in item.items(): + if k == "@id": + if v.startswith("_:"): + dct = iris[v].copy() + dct.pop("@id") + d.update(expand(dct)) + else: + d[k] = v + elif isinstance(v, dict): + if "@value" in v: + if v.get("@type") == "rdf:JSON": + d[k] = json.loads(v["@value"]) + else: + d[k] = v["@value"] + else: + d[k] = expand(v) + else: + d[k] = v + return d + + +def from_rdf(graph, context_data): + graph_data = json.loads( + g.serialize(format="json-ld", context_data=context_data, auto_compact=True) + ).get("@graph") + # iris = {d["@id"]: d for d in graph_data if "@id" in d} + resources = [ + expand(d) for d in graph_data if "@id" in d and not d["@id"].startswith("_:") + ] + json_data = { + "resources": resources, + } + return json_data + + +iris = {d["@id"]: d for d in graph_data if "@id" in d} +assert "http://example.com/ex/faithfull" in iris +# resources = [ +# expand(d) for d in graph_data +# if "@id" in d and not d["@id"].startswith("_:") +# ] +# +# json_data = { +# "resources": resources, +# } +# print(json.dumps(json_data, indent=2)) +print(json.dumps(from_rdf(g, context_data), indent=2)) + + +# PREFIX ex: +res = g.query( + """ +PREFIX ex: +CONSTRUCT { ?s ?p ?o } +WHERE { + ex:faithfull (<>|!<>) ?s . + ?s ?p ?o . +} +""" +) + + +# data = """ +# @prefix : . +# +# :A :p :B, :C . +# :B :q :D . +# :C :r :E . +# +# :F :s :G . +# :G :t :H . +# """ +# query = """ +# PREFIX x: +# PREFIX : +# +# CONSTRUCT { +# ?s ?p ?o +# } +# WHERE { +# :A (<>|!<>)* ?s . +# ?s ?p ?o . +# } +# """ +# graph = rdflib.Graph() +# graph.parse(data=data) +# res = graph.query(query) + + +# with open(staticdir / "resources.yaml", "rt", encoding="utf8") as f: +# data = yaml.safe_load(f) + +graph = rdf.add_resource(staticdir / "resources.yaml")