From cbcadb420653e7e27d8c3d24101d34b91534d6dd Mon Sep 17 00:00:00 2001
From: jonavellecuerdo <cuerdojonavelle@gmail.com>
Date: Fri, 31 May 2024 14:27:13 -0400
Subject: [PATCH] Field method refactor for Ead transform

* Add field methods: languages, locations, notes,
  physical description, publishers, summary
---
 transmogrifier/sources/xml/ead.py | 197 +++++++++++++++++++-----------
 1 file changed, 123 insertions(+), 74 deletions(-)
diff --git a/transmogrifier/sources/xml/ead.py b/transmogrifier/sources/xml/ead.py
index 6fb3366..321f037 100644
--- a/transmogrifier/sources/xml/ead.py
+++ b/transmogrifier/sources/xml/ead.py
@@ -31,7 +31,6 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
 
         # <archdesc> and <did> elements are required when deriving optional fields
         collection_description = self._get_collection_description(source_record)
-        collection_description_did = self._get_collection_description_did(source_record)
 
         # <controlaccess> element is optional (used by multiple optional fields)
         control_access_elements = self._get_control_access(source_record)
@@ -71,86 +70,27 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
         fields["identifiers"] = self.get_identifiers(source_record)
 
         # languages
-        for langmaterial_element in collection_description_did.find_all(
-            "langmaterial", recursive=False
-        ):
-            for language_element in langmaterial_element.find_all("language"):
-                if language_value := self.create_string_from_mixed_value(
-                    language_element
-                ):
-                    fields.setdefault("languages", []).append(language_value)
+        fields["languages"] = self.get_languages(source_record)
 
         # links, omitted pending decision on duplicating source_link
 
         # literary_form field not used in EAD
 
         # locations
-        for control_access_element in control_access_elements:
-            for location_element in control_access_element.find_all("geogname"):
-                if location_value := self.create_string_from_mixed_value(
-                    location_element,
-                    separator=" ",
-                ):
-                    fields.setdefault("locations", []).append(
-                        timdex.Location(value=location_value)
-                    )
+        fields["locations"] = self.get_locations(source_record)
 
         # notes
-        for note_element in collection_description.find_all(
-            [
-                "bibliography",
-                "bioghist",
-                "scopecontent",
-            ],
-            recursive=False,
-        ):
-            subelement_tag = "bibref" if note_element.name == "bibliography" else "p"
-            note_value = []
-            for subelement in note_element.find_all(subelement_tag, recursive=False):
-                if subelement_value := self.create_string_from_mixed_value(
-                    subelement,
-                    separator=" ",
-                ):
-                    note_value.append(subelement_value)  # noqa: PERF401
-            if note_value:
-                note_head_element = note_element.find("head", string=True)
-                fields.setdefault("notes", []).append(
-                    timdex.Note(
-                        value=note_value,
-                        kind=(
-                            note_head_element.string
-                            if note_head_element
-                            else aspace_type_crosswalk.get(
-                                note_element.name, note_element.name
-                            )
-                        ),
-                    )
-                )
+        fields["notes"] = self.get_notes(source_record)
 
         # numbering field not used in EAD
 
         # physical_description
-        physical_descriptions = []
-        for physical_description_element in collection_description_did.find_all(
-            "physdesc", recursive=False
-        ):
-            if physical_description_value := self.create_string_from_mixed_value(
-                physical_description_element, separator=" "
-            ):
-                physical_descriptions.append(physical_description_value)  # noqa: PERF401
-        if physical_descriptions:
-            fields["physical_description"] = "; ".join(physical_descriptions)
+        fields["physical_description"] = self.get_physical_description(source_record)
 
         # publication_frequency field not used in EAD
 
         # publishers
-        if publication_element := collection_description_did.find(  # noqa: SIM102
-            "repository"
-        ):
-            if publication_value := self.create_string_from_mixed_value(
-                publication_element, separator=" "
-            ):
-                fields["publishers"] = [timdex.Publisher(name=publication_value)]
+        fields["publishers"] = self.get_publishers(source_record)
 
         # related_items
         for related_item_element in collection_description.find_all(
@@ -220,15 +160,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
                     )
 
         # summary
-        abstract_values = []
-        for abstract_element in collection_description_did.find_all(
-            "abstract", recursive=False
-        ):
-            if abstract_value := self.create_string_from_mixed_value(
-                abstract_element, separator=" "
-            ):
-                abstract_values.append(abstract_value)  # noqa: PERF401
-        fields["summary"] = abstract_values or None
+        fields["summary"] = self.get_summary(source_record)
 
         return fields
 
@@ -524,6 +456,123 @@ def get_identifiers(cls, source_record: Tag) -> list[timdex.Identifier] | None:
                 )
         return identifiers or None
 
+    @classmethod
+    def get_languages(cls, source_record: Tag) -> list[str] | None:
+        languages = []
+        collection_description_did = cls._get_collection_description_did(source_record)
+        for langmaterial_element in collection_description_did.find_all(
+            "langmaterial", recursive=False
+        ):
+            languages.extend(
+                [
+                    language
+                    for language_element in langmaterial_element.find_all("language")
+                    if (language := cls.create_string_from_mixed_value(language_element))
+                ]
+            )
+        return languages or None
+
+    @classmethod
+    def get_locations(cls, source_record: Tag) -> list[timdex.Location] | None:
+        locations = []
+        control_access_elements = cls._get_control_access(source_record)
+        for control_access_element in control_access_elements:
+            locations.extend(
+                [
+                    timdex.Location(value=location)
+                    for location_element in control_access_element.find_all("geogname")
+                    if (
+                        location := cls.create_string_from_mixed_value(
+                            location_element,
+                            separator=" ",
+                        )
+                    )
+                ]
+            )
+
+        return locations or None
+
+    @classmethod
+    def get_notes(cls, source_record: Tag) -> list[timdex.Note] | None:
+        notes = []
+        collection_description = cls._get_collection_description(source_record)
+        for note_element in collection_description.find_all(
+            [
+                "bibliography",
+                "bioghist",
+                "scopecontent",
+            ],
+            recursive=False,
+        ):
+            subelement_tag = "bibref" if note_element.name == "bibliography" else "p"
+            _notes = [
+                note
+                for subelement in note_element.find_all(subelement_tag, recursive=False)
+                if (
+                    note := cls.create_string_from_mixed_value(
+                        subelement,
+                        separator=" ",
+                    )
+                )
+            ]
+
+            if _notes:
+                notes.append(
+                    timdex.Note(
+                        value=_notes,
+                        kind=cls._get_note_kind(note_element),
+                    )
+                )
+        return notes or None
+
+    @classmethod
+    def _get_note_kind(cls, note_element: Tag) -> str:
+        if head_element := note_element.find("head", string=True):
+            return str(head_element.string)
+        return aspace_type_crosswalk.get(note_element.name, note_element.name)
+
+    @classmethod
+    def get_physical_description(cls, source_record: Tag) -> str | None:
+        collection_description_did = cls._get_collection_description_did(source_record)
+        physical_descriptions = [
+            physical_description
+            for physical_description_element in collection_description_did.find_all(
+                "physdesc", recursive=False
+            )
+            if (
+                physical_description := cls.create_string_from_mixed_value(
+                    physical_description_element, separator=" "
+                )
+            )
+        ]
+        return "; ".join(physical_descriptions) or None
+
+    @classmethod
+    def get_publishers(cls, source_record: Tag) -> list[timdex.Publisher] | None:
+        collection_description_did = cls._get_collection_description_did(source_record)
+        if (publication_element := collection_description_did.find("repository")) and (
+            publication_value := cls.create_string_from_mixed_value(
+                publication_element, separator=" "
+            )
+        ):
+            return [timdex.Publisher(name=publication_value)]
+        return None
+
+    @classmethod
+    def get_summary(cls, source_record: Tag) -> list[str] | None:
+        collection_description_did = cls._get_collection_description_did(source_record)
+        return [
+            abstract
+            for abstract_element in collection_description_did.find_all(
+                "abstract", recursive=False
+            )
+            if (
+                abstract := cls.create_string_from_mixed_value(
+                    abstract_element, separator=" "
+                )
+            )
+        ] or None
+
     @classmethod
     def get_main_titles(cls, source_record: Tag) -> list[str]:
         """