diff --git a/tests/sources/xml/test_ead.py b/tests/sources/xml/test_ead.py
index e3593c8..7801b73 100644
--- a/tests/sources/xml/test_ead.py
+++ b/tests/sources/xml/test_ead.py
@@ -4,6 +4,7 @@
from bs4 import BeautifulSoup
import transmogrifier.models as timdex
+from transmogrifier.exceptions import SkippedRecordEvent
from transmogrifier.sources.xml.ead import Ead
@@ -266,32 +267,52 @@ def test_ead_record_all_fields_transform_correctly():
)
-def test_ead_record_with_missing_archdesc_logs_error(caplog):
- ead_xml_records = Ead.parse_source_file(
- "tests/fixtures/ead/ead_record_missing_archdesc.xml"
+def test_ead_get_collection_description_success():
+ source_record = create_ead_source_record_stub(
+ metadata_insert=(
+ """
+
+
+
+
+ """
+ )
+ )
+ collection_description, collection_description_did = Ead.get_collection_description(
+ source_record
)
- output_records = Ead("aspace", ead_xml_records)
- assert len(list(output_records)) == 0
- assert output_records.processed_record_count == 1
assert (
- "transmogrifier.sources.xml.ead",
- logging.ERROR,
- "Record ID repositories/2/resources/4 is missing archdesc element",
- ) in caplog.record_tuples
+ collection_description.name == "archdesc"
+ and collection_description["level"] == "collection"
+ )
+ assert collection_description_did.name == "did"
-def test_ead_record_with_missing_archdesc_did_logs_error(caplog):
- ead_xml_records = Ead.parse_source_file(
- "tests/fixtures/ead/ead_record_missing_archdesc_did.xml"
+def test_ead_get_collection_description_raises_skipped_record_event_if_archdesc_missing():
+ source_record = create_ead_source_record_stub()
+ with pytest.raises(
+ SkippedRecordEvent,
+ match=(
+ "Record skipped because key information is missing: "
+ '.'
+ ),
+ ):
+ Ead.get_collection_description(source_record)
+
+
+def test_ead_get_collection_description_raises_skipped_record_event_if_did_missing():
+ source_record = create_ead_source_record_stub(
+ metadata_insert=(
+ """
+
+ """
+ )
)
- output_records = Ead("aspace", ead_xml_records)
- assert len(list(output_records)) == 0
- assert output_records.processed_record_count == 1
- assert (
- "transmogrifier.sources.xml.ead",
- logging.ERROR,
- "Record ID repositories/2/resources/3 is missing archdesc > did element",
- ) in caplog.record_tuples
+ with pytest.raises(
+ SkippedRecordEvent,
+ match=("Record skipped because key information is missing: "),
+ ):
+ Ead.get_collection_description(source_record)
def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly():
diff --git a/transmogrifier/sources/xml/ead.py b/transmogrifier/sources/xml/ead.py
index 9a5717a..822392b 100644
--- a/transmogrifier/sources/xml/ead.py
+++ b/transmogrifier/sources/xml/ead.py
@@ -5,6 +5,7 @@
import transmogrifier.models as timdex
from transmogrifier.config import load_external_config
+from transmogrifier.exceptions import SkippedRecordEvent
from transmogrifier.helpers import validate_date, validate_date_range
from transmogrifier.sources.xmltransformer import XMLTransformer
@@ -18,36 +19,20 @@ class Ead(XMLTransformer):
"""EAD transformer."""
def get_optional_fields(self, source_record: Tag) -> dict | None:
- """
- Retrieve optional TIMDEX fields from an EAD XML record.
-
- Overrides metaclass get_optional_fields() method.
+ """_summary_
Args:
- xml: A BeautifulSoup Tag representing a single EAD XML record.
+ source_record (Tag): _description_
+
+ Returns:
+ dict | None: _description_
"""
fields: dict = {}
source_record_id = self.get_source_record_id(source_record)
-
- if collection_description := source_record.metadata.find(
- "archdesc", level="collection"
- ):
- pass
- else:
- message = f"Record ID {self.get_source_record_id(source_record)} is missing archdesc element"
- logger.error(message)
- return None
-
- if collection_description_did := collection_description.did:
- pass
- else:
- message = (
- f"Record ID {self.get_source_record_id(source_record)} is missing archdesc > "
- "did element"
- )
- logger.error(message)
- return None
+ collection_description, collection_description_did = (
+ self.get_collection_description(source_record)
+ )
control_access_elements = collection_description.find_all(
"controlaccess", recursive=False
@@ -361,6 +346,23 @@ def generate_name_identifier_url(cls, name_element: Tag) -> list | None:
return [base_url + identifier]
return None
+ @classmethod
+ def get_collection_description(cls, source_record: Tag) -> tuple[Tag]:
+ if collection_description := source_record.metadata.find(
+ "archdesc", level="collection"
+ ):
+ if collection_description_did := collection_description.did:
+ return collection_description, collection_description_did
+ else:
+ message = "Record skipped because key information is missing: ."
+ raise SkippedRecordEvent(message)
+ else:
+ message = (
+ "Record skipped because key information is missing: "
+ '.'
+ )
+ raise SkippedRecordEvent(message)
+
@classmethod
def get_main_titles(cls, source_record: Tag) -> list[str]:
"""