Skip to content

Commit

Permalink
Field method refactor for Ead transform
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* These updates are required to implement the architecture described
in the following ADR: https://github.com/MITLibraries/transmogrifier/blob/main/docs/adrs/0005-field-methods.md

How this addresses that need:
* Create field method for retrieving <archdesc> and <did> elements
  * Raise SkippedRecordEvent when either of <archdesc> or <did> is missing
* Update tests for Ead transform
  * Add new tests (success, blank, missing) for new field methods
  * Remove redundant tests

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/TIMX-287
  • Loading branch information
jonavellecuerdo committed May 28, 2024
1 parent 1ad5173 commit ce26106
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 45 deletions.
63 changes: 42 additions & 21 deletions tests/sources/xml/test_ead.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from bs4 import BeautifulSoup

import transmogrifier.models as timdex
from transmogrifier.exceptions import SkippedRecordEvent
from transmogrifier.sources.xml.ead import Ead


Expand Down Expand Up @@ -266,32 +267,52 @@ def test_ead_record_all_fields_transform_correctly():
)


def test_ead_record_with_missing_archdesc_logs_error(caplog):
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_missing_archdesc.xml"
def test_ead_get_collection_description_success():
source_record = create_ead_source_record_stub(
metadata_insert=(
"""
<archdesc level="collection">
<did>
</did>
</archdesc>
"""
)
)
collection_description, collection_description_did = Ead.get_collection_description(
source_record
)
output_records = Ead("aspace", ead_xml_records)
assert len(list(output_records)) == 0
assert output_records.processed_record_count == 1
assert (
"transmogrifier.sources.xml.ead",
logging.ERROR,
"Record ID repositories/2/resources/4 is missing archdesc element",
) in caplog.record_tuples
collection_description.name == "archdesc"
and collection_description["level"] == "collection"
)
assert collection_description_did.name == "did"


def test_ead_record_with_missing_archdesc_did_logs_error(caplog):
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_missing_archdesc_did.xml"
def test_ead_get_collection_description_raises_skipped_record_event_if_archdesc_missing():
source_record = create_ead_source_record_stub()
with pytest.raises(
SkippedRecordEvent,
match=(
"Record skipped because key information is missing: "
'<archdesc level="collection">.'
),
):
Ead.get_collection_description(source_record)


def test_ead_get_collection_description_raises_skipped_record_event_if_did_missing():
source_record = create_ead_source_record_stub(
metadata_insert=(
"""
<archdesc level="collection"></archdesc>
"""
)
)
output_records = Ead("aspace", ead_xml_records)
assert len(list(output_records)) == 0
assert output_records.processed_record_count == 1
assert (
"transmogrifier.sources.xml.ead",
logging.ERROR,
"Record ID repositories/2/resources/3 is missing archdesc > did element",
) in caplog.record_tuples
with pytest.raises(
SkippedRecordEvent,
match=("Record skipped because key information is missing: <did>"),
):
Ead.get_collection_description(source_record)


def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly():
Expand Down
50 changes: 26 additions & 24 deletions transmogrifier/sources/xml/ead.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import transmogrifier.models as timdex
from transmogrifier.config import load_external_config
from transmogrifier.exceptions import SkippedRecordEvent
from transmogrifier.helpers import validate_date, validate_date_range
from transmogrifier.sources.xmltransformer import XMLTransformer

Expand All @@ -18,36 +19,20 @@ class Ead(XMLTransformer):
"""EAD transformer."""

def get_optional_fields(self, source_record: Tag) -> dict | None:
"""
Retrieve optional TIMDEX fields from an EAD XML record.
Overrides metaclass get_optional_fields() method.
"""_summary_
Args:
xml: A BeautifulSoup Tag representing a single EAD XML record.
source_record (Tag): _description_
Returns:
dict | None: _description_
"""
fields: dict = {}

source_record_id = self.get_source_record_id(source_record)

if collection_description := source_record.metadata.find(
"archdesc", level="collection"
):
pass
else:
message = f"Record ID {self.get_source_record_id(source_record)} is missing archdesc element"
logger.error(message)
return None

if collection_description_did := collection_description.did:
pass
else:
message = (
f"Record ID {self.get_source_record_id(source_record)} is missing archdesc > "
"did element"
)
logger.error(message)
return None
collection_description, collection_description_did = (
self.get_collection_description(source_record)
)

control_access_elements = collection_description.find_all(
"controlaccess", recursive=False
Expand Down Expand Up @@ -361,6 +346,23 @@ def generate_name_identifier_url(cls, name_element: Tag) -> list | None:
return [base_url + identifier]
return None

@classmethod
def get_collection_description(cls, source_record: Tag) -> tuple[Tag]:
if collection_description := source_record.metadata.find(
"archdesc", level="collection"
):
if collection_description_did := collection_description.did:
return collection_description, collection_description_did
else:
message = "Record skipped because key information is missing: <did>."
raise SkippedRecordEvent(message)
else:
message = (
"Record skipped because key information is missing: "
'<archdesc level="collection">.'
)
raise SkippedRecordEvent(message)

@classmethod
def get_main_titles(cls, source_record: Tag) -> list[str]:
"""
Expand Down

0 comments on commit ce26106

Please sign in to comment.