diff --git a/tests/sources/xml/test_ead.py b/tests/sources/xml/test_ead.py index e3593c8..7801b73 100644 --- a/tests/sources/xml/test_ead.py +++ b/tests/sources/xml/test_ead.py @@ -4,6 +4,7 @@ from bs4 import BeautifulSoup import transmogrifier.models as timdex +from transmogrifier.exceptions import SkippedRecordEvent from transmogrifier.sources.xml.ead import Ead @@ -266,32 +267,52 @@ def test_ead_record_all_fields_transform_correctly(): ) -def test_ead_record_with_missing_archdesc_logs_error(caplog): - ead_xml_records = Ead.parse_source_file( - "tests/fixtures/ead/ead_record_missing_archdesc.xml" +def test_ead_get_collection_description_success(): + source_record = create_ead_source_record_stub( + metadata_insert=( + """ + + + + + """ + ) + ) + collection_description, collection_description_did = Ead.get_collection_description( + source_record ) - output_records = Ead("aspace", ead_xml_records) - assert len(list(output_records)) == 0 - assert output_records.processed_record_count == 1 assert ( - "transmogrifier.sources.xml.ead", - logging.ERROR, - "Record ID repositories/2/resources/4 is missing archdesc element", - ) in caplog.record_tuples + collection_description.name == "archdesc" + and collection_description["level"] == "collection" + ) + assert collection_description_did.name == "did" -def test_ead_record_with_missing_archdesc_did_logs_error(caplog): - ead_xml_records = Ead.parse_source_file( - "tests/fixtures/ead/ead_record_missing_archdesc_did.xml" +def test_ead_get_collection_description_raises_skipped_record_event_if_archdesc_missing(): + source_record = create_ead_source_record_stub() + with pytest.raises( + SkippedRecordEvent, + match=( + "Record skipped because key information is missing: " + '.' + ), + ): + Ead.get_collection_description(source_record) + + +def test_ead_get_collection_description_raises_skipped_record_event_if_did_missing(): + source_record = create_ead_source_record_stub( + metadata_insert=( + """ + + """ + ) ) - output_records = Ead("aspace", ead_xml_records) - assert len(list(output_records)) == 0 - assert output_records.processed_record_count == 1 - assert ( - "transmogrifier.sources.xml.ead", - logging.ERROR, - "Record ID repositories/2/resources/3 is missing archdesc > did element", - ) in caplog.record_tuples + with pytest.raises( + SkippedRecordEvent, + match=("Record skipped because key information is missing: "), + ): + Ead.get_collection_description(source_record) def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly(): diff --git a/transmogrifier/sources/xml/ead.py b/transmogrifier/sources/xml/ead.py index 9a5717a..822392b 100644 --- a/transmogrifier/sources/xml/ead.py +++ b/transmogrifier/sources/xml/ead.py @@ -5,6 +5,7 @@ import transmogrifier.models as timdex from transmogrifier.config import load_external_config +from transmogrifier.exceptions import SkippedRecordEvent from transmogrifier.helpers import validate_date, validate_date_range from transmogrifier.sources.xmltransformer import XMLTransformer @@ -18,36 +19,20 @@ class Ead(XMLTransformer): """EAD transformer.""" def get_optional_fields(self, source_record: Tag) -> dict | None: - """ - Retrieve optional TIMDEX fields from an EAD XML record. - - Overrides metaclass get_optional_fields() method. + """_summary_ Args: - xml: A BeautifulSoup Tag representing a single EAD XML record. + source_record (Tag): _description_ + + Returns: + dict | None: _description_ """ fields: dict = {} source_record_id = self.get_source_record_id(source_record) - - if collection_description := source_record.metadata.find( - "archdesc", level="collection" - ): - pass - else: - message = f"Record ID {self.get_source_record_id(source_record)} is missing archdesc element" - logger.error(message) - return None - - if collection_description_did := collection_description.did: - pass - else: - message = ( - f"Record ID {self.get_source_record_id(source_record)} is missing archdesc > " - "did element" - ) - logger.error(message) - return None + collection_description, collection_description_did = ( + self.get_collection_description(source_record) + ) control_access_elements = collection_description.find_all( "controlaccess", recursive=False @@ -361,6 +346,23 @@ def generate_name_identifier_url(cls, name_element: Tag) -> list | None: return [base_url + identifier] return None + @classmethod + def get_collection_description(cls, source_record: Tag) -> tuple[Tag]: + if collection_description := source_record.metadata.find( + "archdesc", level="collection" + ): + if collection_description_did := collection_description.did: + return collection_description, collection_description_did + else: + message = "Record skipped because key information is missing: ." + raise SkippedRecordEvent(message) + else: + message = ( + "Record skipped because key information is missing: " + '.' + ) + raise SkippedRecordEvent(message) + @classmethod def get_main_titles(cls, source_record: Tag) -> list[str]: """