Skip to content

Commit

Permalink
Merge pull request #118 from MITLibraries/GDT-68-update-external-url
Browse files Browse the repository at this point in the history
Expect single string URL for Aardvark dct_references_s
  • Loading branch information
ghukill authored Feb 7, 2024
2 parents bead8cf + c8fdc03 commit 91980e8
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 11 deletions.
2 changes: 1 addition & 1 deletion tests/fixtures/aardvark/aardvark_record_all_fields.jsonl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id": "mit:123", "dcat_bbox": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "dcat_keyword_sm": ["Country"], "dcat_theme_sm": ["Political boundaries"], "dct_accessRights_s": "Access note", "dct_alternative_sm": ["Alternate title"], "dct_creator_sm": ["Smith, Jane", "Smith, John"], "dct_description_sm": ["A description"], "dct_format_s": "Shapefile", "dct_identifier_sm": ["abc123"], "dct_issued_s": "2003-10-23", "dct_language_sm": ["eng"], "dct_license_sm": ["http://license.license", "http://another_license.another_license"], "dct_publisher_sm": ["ML InfoMap (Firm)"], "dct_references_s": "{\"http://schema.org/downloadUrl\": [{\"label\": \"Source Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.source.fgdc.xml\"}, {\"label\": \"Aardvark Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.normalized.aardvark.json\"}, {\"label\": \"Data\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.zip\"}], \"http://schema.org/url\": [{\"label\": \"Website\", \"url\": \"https://search.libraries.mit.edu/record/gismit:GISPORTAL_GISOWNER01_BOSTONWATER95\"}]}", "dct_rights_sm": ["Some person has the rights"], "dct_rightsHolder_sm": ["The person with the rights", "Another person with the rights"], "dct_spatial_sm": ["Some city, Some country"], "dct_subject_sm": ["Geography", "Earth"], "dct_temporal_sm": ["1943", "1979"], "dct_title_s": "Test title 1", "gbl_dateRange_drsim": ["[1943 TO 1946]"], "gbl_displayNote_sm": ["Danger: This text will be displayed in a red box","Info: This text will be displayed in a blue box","Tip: This text will be displayed in a green box","Warning: This text will be displayed in a yellow box","This is text without a tag and it will be assigned default 'note' style"], "gbl_indexYear_im": [1943,1944,1945,1946], "gbl_resourceClass_sm": ["Dataset"], "gbl_resourceType_sm": ["Vector data"], "gbl_suppressed_b": false, "locn_geometry": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "schema_provider_s": "MIT"}
{"id": "mit:123", "dcat_bbox": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "dcat_keyword_sm": ["Country"], "dcat_theme_sm": ["Political boundaries"], "dct_accessRights_s": "Access note", "dct_alternative_sm": ["Alternate title"], "dct_creator_sm": ["Smith, Jane", "Smith, John"], "dct_description_sm": ["A description"], "dct_format_s": "Shapefile", "dct_identifier_sm": ["abc123"], "dct_issued_s": "2003-10-23", "dct_language_sm": ["eng"], "dct_license_sm": ["http://license.license", "http://another_license.another_license"], "dct_publisher_sm": ["ML InfoMap (Firm)"], "dct_references_s": "{\"http://schema.org/downloadUrl\": [{\"label\": \"Source Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.source.fgdc.xml\"}, {\"label\": \"Aardvark Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.normalized.aardvark.json\"}, {\"label\": \"Data\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.zip\"}], \"http://schema.org/url\": \"https://geodata.libraries.mit.edu/record/gismit:GISPORTAL_GISOWNER01_BOSTONWATER95\"}", "dct_rights_sm": ["Some person has the rights"], "dct_rightsHolder_sm": ["The person with the rights", "Another person with the rights"], "dct_spatial_sm": ["Some city, Some country"], "dct_subject_sm": ["Geography", "Earth"], "dct_temporal_sm": ["1943", "1979"], "dct_title_s": "Test title 1", "gbl_dateRange_drsim": ["[1943 TO 1946]"], "gbl_displayNote_sm": ["Danger: This text will be displayed in a red box","Info: This text will be displayed in a blue box","Tip: This text will be displayed in a green box","Warning: This text will be displayed in a yellow box","This is text without a tag and it will be assigned default 'note' style"], "gbl_indexYear_im": [1943,1944,1945,1946], "gbl_resourceClass_sm": ["Dataset"], "gbl_resourceType_sm": ["Vector data"], "gbl_suppressed_b": false, "locn_geometry": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "schema_provider_s": "MIT"}
2 changes: 1 addition & 1 deletion tests/sources/json/test_aardvark.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_aardvark_get_links_success(aardvark_record_all_fields):
text="Data",
),
timdex.Link(
url="https://search.libraries.mit.edu/record/gismit"
url="https://geodata.libraries.mit.edu/record/gismit"
":GISPORTAL_GISOWNER01_BOSTONWATER95",
kind="Website",
text="Website",
Expand Down
19 changes: 10 additions & 9 deletions transmogrifier/sources/json/aardvark.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,14 @@ def get_identifiers(source_record: dict) -> list[timdex.Identifier]:

@staticmethod
def get_links(source_record: dict, source_record_id: str) -> list[timdex.Link]:
"""Get values from source record for TIMDEX links field."""
"""Get values from source record for TIMDEX links field.
The dct_references_s is a JSON string following a particular format defined here:
https://opengeometadata.org/ogm-aardvark/#references. Keys in the parsed JSON
object define what kind of URL it is. This is a flat mapping of namespace:url,
except in the case of 'http://schema.org/downloadUrl' which will be an array of
complex objects.
"""
links = []
links_string = source_record["dct_references_s"]
try:
Expand All @@ -287,14 +294,8 @@ def get_links(source_record: dict, source_record_id: str) -> list[timdex.Link]:
for link in links_object.get("http://schema.org/downloadUrl", [])
]
)
links.extend(
[
timdex.Link(
url=link.get("url"), kind="Website", text=link.get("label")
)
for link in links_object.get("http://schema.org/url", [])
]
)
if schema_url := links_object.get("http://schema.org/url"):
links.append(timdex.Link(url=schema_url, kind="Website", text="Website"))
except ValueError:
message = (
f"Record ID '{source_record_id}': Unable to parse "
Expand Down

0 comments on commit 91980e8

Please sign in to comment.