from lxml import etree as ET
import pytest
from qurator.mods4pandas.mods4pandas import mods_to_dict
from qurator.mods4pandas.lib import flatten
def dict_fromstring(x):
"""Helper function to parse a MODS XML string to a flattened dict"""
return flatten(mods_to_dict(ET.fromstring(x)))
def test_single_language_languageTerm():
d = dict_fromstring("""
lat
ger
""")
assert d['language_languageTerm'] == {'ger', 'lat'}
def test_multitple_language_languageTerm():
"""
Different languages MAY have multiple mods:language elements.
See MODS-AP 2.3.1
"""
d = dict_fromstring("""
lat
ger
""")
assert d['language_languageTerm'] == {'ger', 'lat'}
def test_role_roleTerm():
d = dict_fromstring("""
Wurm, Mary
Mary
078789583
Wurm
cmp
""")
assert d['name0_role_roleTerm'] == {'cmp'}
def test_multiple_role_roleTerm():
"""
Multiple mods:role/mods:roleTerm should be merged into one column.
"""
d = dict_fromstring("""
Wurm, Mary
Mary
078789583
Wurm
cmp
aut
""")
assert d['name0_role_roleTerm'] == {'cmp', 'aut'}
def test_scriptTerm():
"""
Same language using different scripts have one mods:language, with multiple scriptTerms inside.
See MODS-AP 2.3.1.
"""
d = dict_fromstring("""
ger
215
217
lat
216
""")
assert d['language_scriptTerm'] == {'215', '216', '217'}
def test_recordInfo():
d = dict_fromstring("""
PPN610714341
""")
assert d['recordInfo_recordIdentifier'] == 'PPN610714341'
def test_accessCondition():
d = dict_fromstring("""
UNKNOWN
""")
assert d['accessCondition-use and reproduction'] == 'UNKNOWN'
def test_originInfo_no_event_type():
with pytest.warns(UserWarning) as ws:
d = dict_fromstring("""
Berlin
""")
assert d == {} # empty
assert len(ws) == 1
assert ws[0].message.args[0] == 'Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)'
def test_relatedItem():
d = dict_fromstring("""
PPN167755803
""")
assert d['relatedItem-original_recordInfo_recordIdentifier'] == 'PPN167755803'