Skip to content

Commit

Permalink
[wip] get_alternate_titles
Browse files Browse the repository at this point in the history
  • Loading branch information
jonavellecuerdo committed Jul 19, 2024
1 parent 82584bb commit e190ab3
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 46 deletions.
94 changes: 94 additions & 0 deletions tests/sources/xml/test_marc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# ruff: noqa: E501
import logging

from bs4 import BeautifulSoup # type: ignore[import-untyped]
Expand All @@ -6,6 +7,32 @@
from transmogrifier.sources.xml.marc import Marc


def create_marc_source_record_stub(datafield_insert: str = ""):
"""
Create source record for unit tests.
Args:
datafield_insert (str): A string representing a MARC 'datafield' XML element.
Note: A source record for "missing" field method tests can be created by
setting datafield_insert = "" (the default).
"""
xml_string = """
<collection>
<record>
<leader>03282nam 2200721Ki 4500</leader>
<controlfield tag="008">170906s2016 fr mun| o e zxx d</controlfield>
<controlfield tag="001">990027185640106761</controlfield>
{datafield_insert}
</record>
</collection>
"""
return BeautifulSoup(
xml_string.format(datafield_insert=datafield_insert),
"xml",
)


def test_marc_record_all_fields_transform_correctly():
marc_xml_records = Marc.parse_source_file(
"tests/fixtures/marc/marc_record_all_fields.xml"
Expand Down Expand Up @@ -748,6 +775,73 @@ def test_marc_record_with_missing_optional_fields_transforms_correctly():
)


def test_get_alternate_titles_success():
source_record = create_marc_source_record_stub(
datafield_insert=(
"""
<datafield tag="130" ind1="0" ind2="0">
<subfield code="a">Main Entry</subfield>
<subfield code="d">Date 1</subfield>
<subfield code="d">Date 2</subfield>
</datafield>
<datafield tag="240" ind1="0" ind2="0">
<subfield code="a">Uniform</subfield>
<subfield code="d">Date 1</subfield>
<subfield code="d">Date 2</subfield>
</datafield>
<datafield tag="246" ind1="0" ind2="0">
<subfield code="a">Varying Form</subfield>
<subfield code="b">Of Title 1.</subfield>
</datafield>
<datafield tag="730" ind1="0" ind2="0">
<subfield code="a">Added Entry 2</subfield>
<subfield code="n">Part 1</subfield>
<subfield code="n">Part 2</subfield>
</datafield>
<datafield tag="740" ind1="0" ind2="0">
<subfield code="a">Added Entry 1</subfield>
<subfield code="n">Part 1</subfield>
<subfield code="n">Part 2</subfield>
</datafield>
"""
)
)
assert Marc.get_alternate_titles(source_record) == [
timdex.AlternateTitle(value="Main Entry Date 1 Date 2", kind="Preferred Title"),
timdex.AlternateTitle(value="Uniform Date 1 Date 2", kind="Preferred Title"),
timdex.AlternateTitle(
value="Varying Form Of Title 1", kind="Varying Form of Title"
),
timdex.AlternateTitle(
value="Added Entry 2 Part 1 Part 2", kind="Preferred Title"
),
timdex.AlternateTitle(
value="Added Entry 1 Part 1 Part 2",
kind="Uncontrolled Related/Analytical Title",
),
]


def test_get_alternate_titles_transforms_correctly_if_fields_blank():
source_record = create_marc_source_record_stub(
datafield_insert=(
"""
<datafield tag="130" ind1="0" ind2="0">
<subfield code="a"></subfield>
<subfield code="d"></subfield>
<subfield code="d"></subfield>
</datafield>
"""
)
)
assert Marc.get_alternate_titles(source_record) is None


def test_get_alternate_titles_transforms_correctly_if_fields_missing():
source_record = create_marc_source_record_stub()
assert Marc.get_alternate_titles(source_record) is None


def test_marc_record_missing_leader_logs_error(caplog):
marc_xml_records = Marc.parse_source_file(
"tests/fixtures/marc/marc_record_missing_leader.xml"
Expand Down
104 changes: 58 additions & 46 deletions transmogrifier/sources/xml/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
Overrides metaclass get_optional_fields() method.
Args:
xml: A BeautifulSoup Tag representing a single MARC XML record.
source_record: A BeautifulSoup Tag representing a single MARC XML record.
"""
fields: dict = {}

Expand All @@ -57,51 +57,8 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
logger.error(message)
return None

# alternate_titles
alternate_title_marc_fields = [
{
"tag": "130",
"subfields": "adfghklmnoprst",
"kind": "Preferred Title",
},
{
"tag": "240",
"subfields": "adfghklmnoprs",
"kind": "Preferred Title",
},
{
"tag": "246",
"subfields": "abfghinp",
"kind": "Varying Form of Title",
},
{
"tag": "730",
"subfields": "adfghiklmnoprst",
"kind": "Preferred Title",
},
{
"tag": "740",
"subfields": "anp",
"kind": "Uncontrolled Related/Analytical Title",
},
]
for alternate_title_marc_field in alternate_title_marc_fields:
for datafield in source_record.find_all(
"datafield", tag=alternate_title_marc_field["tag"]
):
if alternate_title_value := (
self.create_subfield_value_string_from_datafield(
datafield,
alternate_title_marc_field["subfields"],
" ",
)
):
fields.setdefault("alternate_titles", []).append(
timdex.AlternateTitle(
value=alternate_title_value.rstrip(" .,/"),
kind=alternate_title_marc_field["kind"],
)
)
# alternate titles
fields["alternate_titles"] = self.get_alternate_titles(source_record)

# call_numbers
call_number_marc_fields = [
Expand Down Expand Up @@ -799,6 +756,61 @@ def loc_crosswalk_code_to_name(
)
return str(code_element.parent.find("name").string)

@classmethod
def get_alternate_titles(
cls, source_record: Tag
) -> list[timdex.AlternateTitle] | None:
alternate_titles = []
alternate_title_marc_fields = [
{
"tag": "130",
"subfields": "adfghklmnoprst",
"kind": "Preferred Title",
},
{
"tag": "240",
"subfields": "adfghklmnoprs",
"kind": "Preferred Title",
},
{
"tag": "246",
"subfields": "abfghinp",
"kind": "Varying Form of Title",
},
{
"tag": "730",
"subfields": "adfghiklmnoprst",
"kind": "Preferred Title",
},
{
"tag": "740",
"subfields": "anp",
"kind": "Uncontrolled Related/Analytical Title",
},
]
for alternate_title_marc_field in alternate_title_marc_fields:
alternate_titles.extend(
[
timdex.AlternateTitle(
value=alternate_title_value.rstrip(" .,/"),
kind=alternate_title_marc_field["kind"],
)
for datafield in source_record.find_all(
"datafield", tag=alternate_title_marc_field["tag"]
)
if (
alternate_title_value := (
Marc.create_subfield_value_string_from_datafield(
datafield,
alternate_title_marc_field["subfields"],
" ",
)
)
)
]
)
return alternate_titles or None

@staticmethod
def get_main_titles(xml: Tag) -> list[str]:
"""
Expand Down

0 comments on commit e190ab3

Please sign in to comment.