Skip to content

Commit

Permalink
jsonld - Improve handling of URNs in norm_url (#2892)
Browse files Browse the repository at this point in the history
* jsonld - Improve handling of URNs in norm_url

* Fix import package

* Fix formatting with black
  • Loading branch information
avillar authored Aug 26, 2024
1 parent 1618b3f commit 5baa8d5
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 7 deletions.
29 changes: 22 additions & 7 deletions rdflib/plugins/shared/jsonld/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,28 @@ def norm_url(base: str, url: str) -> str:
"""
if "://" in url:
return url
parts = urlsplit(urljoin(base, url))
path = normpath(parts[2])
if sep != "/":
path = "/".join(path.split(sep))
if parts[2].endswith("/") and not path.endswith("/"):
path += "/"
result = urlunsplit(parts[0:2] + (path,) + parts[3:])

# Fix for URNs
parsed_base = urlsplit(base)
parsed_url = urlsplit(url)
if parsed_url.scheme:
# Assume full URL
return url
if parsed_base.scheme in ("urn", "urn-x"):
# No scheme -> assume relative and join paths
base_path_parts = parsed_base.path.split("/", 1)
base_path = "/" + (base_path_parts[1] if len(base_path_parts) > 1 else "")
joined_path = urljoin(base_path, parsed_url.path)
fragment = f"#{parsed_url.fragment}" if parsed_url.fragment else ""
result = f"{parsed_base.scheme}:{base_path_parts[0]}{joined_path}{fragment}"
else:
parts = urlsplit(urljoin(base, url))
path = normpath(parts[2])
if sep != "/":
path = "/".join(path.split(sep))
if parts[2].endswith("/") and not path.endswith("/"):
path += "/"
result = urlunsplit(parts[0:2] + (path,) + parts[3:])
if url.endswith("#") and not result.endswith("#"):
result += "#"
return result
Expand Down
21 changes: 21 additions & 0 deletions test/jsonld/test_norm_urn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from rdflib.plugins.shared.jsonld.util import norm_url


def test_norm_urn():
assert norm_url("urn:ns:test", "/one") == "urn:ns:test/one"
assert norm_url("urn:ns:test/path/", "two") == "urn:ns:test/path/two"
assert norm_url("urn:ns:test/path", "two") == "urn:ns:test/two"
assert norm_url("urn:ns:test", "three") == "urn:ns:test/three"
assert norm_url("urn:ns:test/path#", "four") == "urn:ns:test/four"
assert norm_url("urn:ns:test/path1/path2/", "../path3") == "urn:ns:test/path1/path3"
assert norm_url("urn:ns:test/path1/path2/", "/path3") == "urn:ns:test/path3"
assert (
norm_url("urn:ns:test/path1/path2/", "http://example.com")
== "http://example.com"
)
assert (
norm_url("urn:ns:test/path1/path2/", "urn:another:test/path")
== "urn:another:test/path"
)
assert norm_url("urn:ns:test/path", "#four") == "urn:ns:test/path#four"
assert norm_url("urn:ns:test/path/", "#four") == "urn:ns:test/path/#four"

0 comments on commit 5baa8d5

Please sign in to comment.