Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exploratory field method refactor #166

Draft
wants to merge 4 commits into
base: field-method-refactor
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from transmogrifier.config import SOURCES, load_external_config
from transmogrifier.sources.jsontransformer import JSONTransformer
from transmogrifier.sources.xml.datacite import Datacite
from transmogrifier.sources.xml.dspace_dim import DspaceDim
from transmogrifier.sources.xmltransformer import XMLTransformer


Expand Down Expand Up @@ -43,13 +44,24 @@ def runner():
return CliRunner()


# aardvark ##########################


@pytest.fixture
def aardvark_records():
return JSONTransformer.parse_source_file("tests/fixtures/aardvark_records.jsonl")


@pytest.fixture
def aardvark_record_all_fields():
return JSONTransformer.parse_source_file(
"tests/fixtures/aardvark/aardvark_record_all_fields.jsonl"
)


# datacite ##########################


@pytest.fixture
def datacite_records():
return XMLTransformer.parse_source_file(
Expand All @@ -65,9 +77,42 @@ def datacite_record_all_fields():
return Datacite("cool-repo", source_records)


# dspace_dim ##########################


@pytest.fixture
def aardvark_records():
return JSONTransformer.parse_source_file("tests/fixtures/aardvark_records.jsonl")
def dspace_dim_record_all_fields():
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_all_fields.xml"
)
return next(source_records)


@pytest.fixture
def dspace_dim_record_attribute_and_subfield_variations():
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_attribute_and_subfield_variations.xml"
)
return next(source_records)


@pytest.fixture
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike the _attribute_and_subfield_variations fixtures (which I didn't touch but have concerns about how useful they are), _errors fixtures are intended to hold all edge cases that would trigger logging or alternate behavior. Keeping them all in one fixture that is used by multiple edge case tests (test_get_dates_invalid_date_range_skipped) should keep the testing suite cleaner. Open to other names for this fixture

def dspace_dim_record_optional_fields_blank():
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_optional_fields_blank.xml"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should likely rename this fixture and _optional_fields_missing during the refactor

)
return next(source_records)


@pytest.fixture
def dspace_dim_record_optional_fields_missing():
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_optional_fields_missing.xml"
)
return next(source_records)


# marc ##########################


@pytest.fixture
Expand All @@ -80,11 +125,17 @@ def marc_content_type_crosswalk():
return load_external_config("config/marc_content_type_crosswalk.json", "json")


# oaidc ##########################


@pytest.fixture
def oai_pmh_records():
return XMLTransformer.parse_source_file("tests/fixtures/oai_pmh_records.xml")


# timdex ##########################


@pytest.fixture
def timdex_record_required_fields():
return timdex.TimdexRecord(
Expand Down
89 changes: 88 additions & 1 deletion tests/sources/xml/test_dspace_dim.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
# ruff: noqa: E501
from bs4 import BeautifulSoup

import transmogrifier.models as timdex
from transmogrifier.sources.xml.dspace_dim import DspaceDim


def create_dspace_dim_source_record_stub(xml_insert: str) -> BeautifulSoup:
xml_str = f"""
<records>
<record>
<metadata>
<dim:dim xmlns:dim="http://www.dspace.org/xmlns/dspace/dim"
xmlns:doc="http://www.lyncode.com/xoai"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.dspace.org/xmlns/dspace/dim http://www.dspace.org/schema/dim.xsd">
{xml_insert}
</dim:dim>
</metadata>
</record>
</records>
"""
return BeautifulSoup(xml_str, "xml")


def test_dspace_dim_transform_with_all_fields_transforms_correctly():
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_all_fields.xml"
Expand Down Expand Up @@ -133,7 +154,7 @@ def test_dspace_dim_transform_with_all_fields_transforms_correctly():

def test_dspace_dim_transform_with_attribute_variations_transforms_correctly():
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_attribute_variations.xml"
"tests/fixtures/dspace/dspace_dim_record_attribute_and_subfield_variations.xml"
)
output_records = DspaceDim("cool-repo", source_records)
assert next(output_records) == timdex.TimdexRecord(
Expand Down Expand Up @@ -216,3 +237,69 @@ def test_dspace_dim_transform_with_optional_fields_missing_transforms_correctly(
format="electronic resource",
content_type=["Not specified"],
)


def test_get_contents_success():
source_record = create_dspace_dim_source_record_stub(
"""
<dim:field mdschema="dc" element="description" qualifier="tableofcontents" lang="en">Chapter 1</dim:field>
"""
)
assert DspaceDim.get_contents(source_record) == ["Chapter 1"]


def test_get_contents_transforms_correctly_if_fields_blank(
dspace_dim_record_optional_fields_blank,
):
assert DspaceDim.get_contents(dspace_dim_record_optional_fields_blank) == []


def test_get_contents_transforms_correctly_if_fields_missing(
dspace_dim_record_optional_fields_missing,
):
assert DspaceDim.get_contents(dspace_dim_record_optional_fields_missing) == []


def test_get_dates_success():
source_record = create_dspace_dim_source_record_stub(
"""
<dim:field mdschema="dc" element="coverage" qualifier="temporal">1201-01-01 - 1965-12-21</dim:field>
<dim:field mdschema="dc" element="coverage" qualifier="temporal">1201-01-01/1965-12-21</dim:field>
<dim:field mdschema="dc" element="date" qualifier="accessioned">2009-01-08T16:24:37Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="available">2009-01-08T16:24:37Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="issued">2002-11</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="uri">https://hdl.handle.net/1912/2641</dim:field>
"""
)
assert DspaceDim.get_dates(source_record, "abc123") == [
timdex.Date(kind="accessioned", value="2009-01-08T16:24:37Z"),
timdex.Date(kind="available", value="2009-01-08T16:24:37Z"),
timdex.Date(kind="Publication date", value="2002-11"),
timdex.Date(kind="coverage", note="1201-01-01 - 1965-12-21"),
timdex.Date(
kind="coverage",
range=timdex.DateRange(gte="1201-01-01", lte="1965-12-21"),
),
]


def test_get_dates_transforms_correctly_if_fields_blank(
dspace_dim_record_optional_fields_blank,
):
assert DspaceDim.get_dates(dspace_dim_record_optional_fields_blank, "abc123") == []


def test_get_dates_transforms_correctly_if_fields_missing(
dspace_dim_record_optional_fields_missing,
):
assert DspaceDim.get_dates(dspace_dim_record_optional_fields_missing, "abc123") == []


def test_get_dates_invalid_date_range_skipped():
source_record = create_dspace_dim_source_record_stub(
"""
<dim:field element="coverage" qualifier="temporal">2020-01-02/2019-01-01
</dim:field>
"""
)
assert DspaceDim.get_dates(source_record, "abc123") == []
Loading
Loading