EMMC-ASBL · jesper-friis · Nov 2, 2022 · Nov 18, 2022 · Jun 28, 2023 · Sep 22, 2023
diff --git a/.github/workflows/cd_release.yml → .../workflows/removed_for_now/cd_release.yml b/.github/workflows/cd_release.yml → .../workflows/removed_for_now/cd_release.yml
diff --git a/.github/workflows/ci_cd_updated_master.yml → .../removed_for_now/ci_cd_updated_master.yml b/.github/workflows/ci_cd_updated_master.yml → .../removed_for_now/ci_cd_updated_master.yml
diff --git a/.github/workflows/ci_dependabot.yml → ...rkflows/removed_for_now/ci_dependabot.yml b/.github/workflows/ci_dependabot.yml → ...rkflows/removed_for_now/ci_dependabot.yml
diff --git a/docs/api_reference/utils/rdf.md b/docs/api_reference/utils/rdf.md
@@ -0,0 +1,3 @@
+# rdf
+
+::: oteapi.utils.rdf
diff --git a/oteapi/utils/rdf.py b/oteapi/utils/rdf.py
@@ -0,0 +1,113 @@
+"""Utility functions for representing instances of pydantic models as rdf.
+
+This module uses JSON-LD with a shared context on https://w3id.org/domain/oteio/context
+"""
+
+import io
+import json
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import rdflib
+import yaml
+
+if TYPE_CHECKING:  # pragma: no cover
+    from typing import Any, Optional, TextIO, Union
+
+    # import tripper
+
+
+def load_content(
+    source: "Optional[Union[Path, str, TextIO]]" = None,
+    data: "Optional[str]" = None,
+    format: "Optional[str]" = None,
+) -> "Any":
+    """Load content from yaml or json source.
+
+    Arguments:
+        source: File name or file-like object with data documentation to add.
+        data: String containing the data documentation to add.
+        format: Input format. One of: "yaml", "json".
+            By default it will be inferred from `source` or `data`.
+
+    Returns:
+        Python representation of the content.
+    """
+    if not source and not data:
+        raise TypeError("Either `source` or `data` must be given.")
+
+    if source and isinstance(source, (str, Path)):
+        with open(source, "rt") as f:
+            return load_content(source=f, format=format)
+
+    if format is None:
+        if source:
+            format = Path(source.name).suffix
+        elif data.lstrip().startswith("---"):
+            format = "yaml"
+        elif data.lstrip().startswith("{"):
+            format = "json"
+
+    if format is None:
+        raise ValueError("Format cannot be inferred.  Use `format` argument.")
+
+    format = format.lstrip(".").lower()
+    if format in ("yaml", "yml"):
+        if not source:
+            source = io.StringIO(data)
+        content = yaml.safe_load(source)
+    elif format in ("json"):
+        content = json.load(source) if source else json.loads(data)
+    else:
+        raise TypeError(f"Unsupported format: {format}")
+
+    return content
+
+
+def add_resource(
+    source: "Optional[Union[Path, str, TextIO]]" = None,
+    data: "Optional[dict, str]" = None,
+    format: "Optional[str]" = None,
+    graph: "Optional[Union[rdflib.Graph, Any]]" = None,
+) -> "Union[rdflib.Graph, Any]":
+    """Add documentation of data resource(s) to triplestore.
+
+    Arguments:
+
+        source: File name or file-like object with data documentation
+            to add.
+        data: Dict or string containing the data documentation to add.
+        format: Input format. One of: "yaml", "json".
+            By default it will be inferred from `source` or `data`.
+        graph: The graph to add the documentation to.  It can be a
+            rdflib.Graph object or any type that has a parse() method
+            that supports json-ld.
+            If not given, a new rdflib.Graph object will be created.
+
+    Returns:
+        The provided graph or a new rdflib.Graph object, if `graph` is
+        None.
+    """
+    if isinstance(data, dict):
+        content = data.copy()
+    else:
+        content = load_content(source=source, data=data, format=format)
+
+    if not isinstance(content, dict):
+        raise TypeError("Expected input content to be a dict.")
+
+    if "@context" not in content:
+        content["@context"] = "https://w3id.org/emmo/domain/oteio/context"
+
+    if not graph:
+        graph = rdflib.Graph()
+
+    with open("xxx.json", "wt") as f:
+        json.dump(content, f, indent=2)
+
+    # print("=====================================")
+    # print(json.dumps(content, indent=2))
+
+    # graph.parse(data=content, format="json-ld")
+    graph.parse(source="xxx.json", format="json-ld")
+    return graph
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,6 +27,7 @@ dependencies = [
     "pydantic~=2.7",
     "pydantic-settings~=2.2",
     "typing-extensions~=4.11; python_version < '3.10'",
+    "rdflib>=6.3",
 
     # Strategy dependencies
     "celery>=5.3.5,<6",

diff --git a/tests/models/test_jsonld.py b/tests/models/test_jsonld.py
@@ -0,0 +1,237 @@
+import json
+from pathlib import Path
+
+import rdflib
+from rdflib.plugins.shared.jsonld.context import Context
+
+from oteapi.utils import rdf
+
+thisdir = Path(__file__).resolve().parent
+testdir = thisdir.parent
+staticdir = testdir / "static"
+
+# s = """
+# {
+#   "@context": {
+#     "@vocab": "http://xmlns.com/foaf/0.1/",
+#     "knows": {"@type": "@id"}
+#   },
+#   "@id": "http://manu.sporny.org/about#manu",
+#   "@type": "Person",
+#   "name": "Manu Sporny",
+#   "knows": {
+#     "@id": "https://greggkellogg.net/foaf#me",
+#     "@type": "Person",
+#     "name": "Gregg Kellogg"
+#   }
+# }
+# """
+# g = rdflib.Graph()
+# g.parse(data=s, format="json-ld")
+# #print(g.serialize(format="turtle"))
+# #print("------------------------------------------------")
+# #print()
+#
+#
+# conf = """
+# {
+#   "@context": {
+#     "oteio": "https://w3id.org/emmo/domain/oteio#",
+#     "dcat": "http://www.w3.org/ns/dcat#",
+#     "dcterms": "http://purl.org/dc/terms/",
+#
+#     "downloadURL": "dcat:downloadURL",
+#     "mediaType": "dcat:mediaType",
+#     "license": "dcterms:license",
+#     "driver": "oteio:driver",
+#     "configuration": "oteio:configuration",
+#     "dataresource": "oteio:dataresource"
+#   },
+#   "http://example.com/ex/faithfull": {
+#     "@type": "oteio:Source",
+#     "@id": "http://example.com/ex/faithfull",
+#     "dataresource": {
+#       "downloadURL": "http://example.com/datasets/faithfull.csv",
+#       "mediaType": "application/csv",
+#       "license": "https://creativecommons.org/licenses/by/4.0/legalcode",
+#       "configuration": {
+#         "driver": "csv"
+#       }
+#     }
+#   }
+# }
+# """
+# #  "http://example.com/ex/faithfull": {
+# g = rdflib.Graph()
+# #g.bind("ex", "http://example.com/ex/")
+# g.parse(data=conf, format="json-ld")
+# #print(g.serialize(format="turtle"))
+# #print("------------------------------------------------")
+# #print()
+
+
+conf2 = """
+{
+  "@context": {
+    "@version": 1.1,
+
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+    "dcterms": "http://purl.org/dc/terms/",
+    "dcat": "http://www.w3.org/ns/dcat#",
+    "oteio": "https://w3id.org/emmo/domain/oteio#",
+
+    "resources": "@nest",
+    "configuration": {
+      "@id": "oteio:configuration",
+      "@type": "@json"
+    },
+    "dataresource": {
+      "@id": "oteio:dataresource",
+      "@type": "oteio:DataResource",
+      "@nest": "resources"
+    },
+    "downloadURL": "dcat:downloadURL",
+    "mediaType": "dcat:mediaType",
+    "license": "dcterms:license",
+
+    "parse": "oteio:parse",
+    "parserType": "oteio:parserType",
+    "datamodel": "oteio:datamodel",
+
+    "driver": "oteio:driver"
+  },
+
+  "resources": [
+    {
+      "@type": "oteio:Source",
+      "@id": "http://example.com/ex/faithfull",
+      "dataresource": {
+        "downloadURL": "http://example.com/datasets/faithfull.csv",
+        "mediaType": "application/csv",
+        "license": "https://creativecommons.org/licenses/by/4.0/legalcode"
+      },
+      "parse": {
+        "parserType": "application/vnd.dlite-parse",
+        "datamodel": "http://onto-ns.com/meta/calm/0.1/Composition",
+        "configuration": {
+          "driver": "csv"
+        }
+      }
+    }
+  ]
+}
+"""
+#  "http://example.com/ex/faithfull": {
+g = rdflib.Graph()
+# g.bind("ex", "http://example.com/ex/")
+# print(json.loads(conf2))
+g.parse(data=conf2, format="json-ld")
+print(g.serialize(format="turtle"))
+print("------------------------------------------------")
+print()
+context_data = json.loads(conf2).get("@context")
+context = Context(context_data)
+graph_data = json.loads(
+    g.serialize(format="json-ld", context_data=context_data, auto_compact=True)
+).get("@graph")
+# print(json.dumps(graph_data, indent=2))
+# print("------------------------------------------------")
+# print()
+
+
+def expand(item):
+    """Returns `item` with all all references to blank nodes expanded."""
+    d = {}
+    for k, v in item.items():
+        if k == "@id":
+            if v.startswith("_:"):
+                dct = iris[v].copy()
+                dct.pop("@id")
+                d.update(expand(dct))
+            else:
+                d[k] = v
+        elif isinstance(v, dict):
+            if "@value" in v:
+                if v.get("@type") == "rdf:JSON":
+                    d[k] = json.loads(v["@value"])
+                else:
+                    d[k] = v["@value"]
+            else:
+                d[k] = expand(v)
+        else:
+            d[k] = v
+    return d
+
+
+def from_rdf(graph, context_data):
+    graph_data = json.loads(
+        g.serialize(format="json-ld", context_data=context_data, auto_compact=True)
+    ).get("@graph")
+    # iris = {d["@id"]: d for d in graph_data if "@id" in d}
+    resources = [
+        expand(d) for d in graph_data if "@id" in d and not d["@id"].startswith("_:")
+    ]
+    json_data = {
+        "resources": resources,
+    }
+    return json_data
+
+
+iris = {d["@id"]: d for d in graph_data if "@id" in d}
+assert "http://example.com/ex/faithfull" in iris
+# resources = [
+#    expand(d) for d in graph_data
+#    if "@id" in d and not d["@id"].startswith("_:")
+# ]
+#
+# json_data = {
+#    "resources": resources,
+# }
+# print(json.dumps(json_data, indent=2))
+print(json.dumps(from_rdf(g, context_data), indent=2))
+
+
+# PREFIX ex: <http://example.com/ex/>
+res = g.query(
+    """
+PREFIX ex: <http://example.com/ex/>
+CONSTRUCT { ?s ?p ?o }
+WHERE {
+  ex:faithfull (<>|!<>) ?s .
+  ?s ?p ?o .
+}
+"""
+)
+
+
+# data = """
+# @prefix : <urn:ex:> .
+#
+# :A :p :B, :C .
+# :B :q :D .
+# :C :r :E .
+#
+# :F :s :G .
+# :G :t :H .
+# """
+# query = """
+# PREFIX x: <urn:ex:>
+# PREFIX : <urn:ex:>
+#
+# CONSTRUCT {
+#   ?s ?p ?o
+# }
+# WHERE {
+#   :A (<>|!<>)* ?s .
+#   ?s ?p ?o .
+# }
+# """
+# graph = rdflib.Graph()
+# graph.parse(data=data)
+# res = graph.query(query)
+
+
+# with open(staticdir / "resources.yaml", "rt", encoding="utf8") as f:
+#    data = yaml.safe_load(f)
+
+graph = rdf.add_resource(staticdir / "resources.yaml")