diff --git a/examples/bind_prefix_cc.py b/examples/bind_prefix_cc.py new file mode 100644 index 000000000..f0dc92c05 --- /dev/null +++ b/examples/bind_prefix_cc.py @@ -0,0 +1,18 @@ +""" +Prefix.cc is a community curated prefix map. By using `bind_namespace="cc"`, +you can set a namespace manager or graph to dynamically load prefixes from +this resource. +""" + +import rdflib + +graph = rdflib.Graph(bind_namespaces="cc") + +# The Gene Ontology is a biomedical ontology describing +# biological processes, cellular locations, and cellular components. +# It is typically abbreviated with the prefix "go" and uses PURLs +# issued by the Open Biological and Biomedical Ontologies Foundry. +prefix_map = {prefix: str(ns) for prefix, ns in graph.namespaces()} +assert "go" in prefix_map +assert prefix_map["go"] == "http://purl.obolibrary.org/obo/GO_" +assert graph.qname("http://purl.obolibrary.org/obo/GO_0032571") == "go:0032571" diff --git a/rdflib/_type_checking.py b/rdflib/_type_checking.py index ac6e2b8b8..f09b70cc2 100644 --- a/rdflib/_type_checking.py +++ b/rdflib/_type_checking.py @@ -27,5 +27,5 @@ else: from typing_extensions import Literal as PyLiteral -_NamespaceSetString = PyLiteral["core", "rdflib", "none"] +_NamespaceSetString = PyLiteral["core", "rdflib", "none", "cc"] _MulPathMod = PyLiteral["*", "+", "?"] # noqa: F722 diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index c88fdedd4..701872485 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union from unicodedata import category from urllib.parse import urldefrag, urljoin +from urllib.request import urlopen from rdflib.term import URIRef, Variable, _is_valid_uri @@ -372,7 +373,6 @@ class NamespaceManager(object): * note this is NOT default behaviour * cc: * using prefix bindings from prefix.cc which is a online prefixes database - * not implemented yet - this is aspirational .. attention:: @@ -427,11 +427,14 @@ def __init__( for prefix, ns in _NAMESPACE_PREFIXES_CORE.items(): self.bind(prefix, ns) elif bind_namespaces == "cc": + for prefix, ns in _NAMESPACE_PREFIXES_RDFLIB.items(): + self.bind(prefix, ns) + for prefix, ns in _NAMESPACE_PREFIXES_CORE.items(): + self.bind(prefix, ns) # bind any prefix that can be found with lookups to prefix.cc - # first bind core and rdflib ones - # work out remainder - namespaces without prefixes - # only look those ones up - raise NotImplementedError("Haven't got to this option yet") + for prefix, ns in _get_prefix_cc().items(): + # note that prefixes are lowercase-only in prefix.cc + self.bind(prefix, ns) elif bind_namespaces == "core": # bind a few core RDF namespaces - default for prefix, ns in _NAMESPACE_PREFIXES_CORE.items(): @@ -728,6 +731,13 @@ def absolutize(self, uri: str, defrag: int = 1) -> URIRef: return URIRef(result) +def _get_prefix_cc(): + """Get the context from Prefix.cc.""" + response = urlopen("https://prefix.cc/context.jsonld") + context = json.loads(response.read()) + return context["@context"] + + # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar # # * Name start characters must have one of the categories Ll, Lu, Lo, diff --git a/test/test_namespace/test_namespacemanager.py b/test/test_namespace/test_namespacemanager.py index 20cb9594f..20ecae68d 100644 --- a/test/test_namespace/test_namespacemanager.py +++ b/test/test_namespace/test_namespacemanager.py @@ -181,6 +181,22 @@ def test_graph_bind_namespaces( assert namespaces is None +def test_graph_bind_cc(): + """Test binding Prefix.cc. + + Note that since prefix.cc is an inherently dynamic resource, + that checking an exact equivalence is not applicable. + """ + graph = Graph(bind_namespaces="cc") + namespaces = {*graph.namespaces()} + for namespaces in [ + _NAMESPACE_PREFIXES_CORE, + _NAMESPACE_PREFIXES_RDFLIB, + {"go", "atcc"}, # represent some prefixes in Prefix.cc + ]: + assert all(ns in namespaces for ns in _NAMESPACE_PREFIXES_CORE) + + @pytest.mark.parametrize( ["selector", "expected_result"], [