🐛 Handle multiple mods:role

master
Gerber, Mike 3 years ago
parent 75ff143a25
commit 9a8f7f69bd

@ -165,6 +165,25 @@ class TagGroup:
warnings.warn('Changed scriptTerm authority to lower case') warnings.warn('Changed scriptTerm authority to lower case')
return self return self
def merge_sub_tags_to_set(self):
value = {}
sub_dicts = [mods_to_dict(e) for e in self.group]
sub_tags = {k for d in sub_dicts for k in d.keys()}
for sub_tag in sub_tags:
s = set()
for d in sub_dicts:
v = d.get(sub_tag)
if v:
# There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a
# German language document.
if isinstance(v, set):
s.update(v)
else:
s.add(v)
value[sub_tag] = s
return value
def sorted_groupby(iterable, key=None): def sorted_groupby(iterable, key=None):
""" """
@ -303,21 +322,8 @@ def mods_to_dict(mods, raise_errors=True):
k = 'genre-{}'.format(authority) if authority is not None else 'genre' k = 'genre-{}'.format(authority) if authority is not None else 'genre'
value[k] = {e.text for e in group if e.attrib.get('authority') == authority} value[k] = {e.text for e in group if e.attrib.get('authority') == authority}
elif tag == '{http://www.loc.gov/mods/v3}language': elif tag == '{http://www.loc.gov/mods/v3}language':
# Make languageTerm/scriptTerm sets value["language"] = TagGroup(tag, group) \
sub_dicts = [mods_to_dict(e) for e in group] .merge_sub_tags_to_set()
sub_tags = {k for d in sub_dicts for k in d.keys()}
for sub_tag in sub_tags:
s = set()
for d in sub_dicts:
v = d.get(sub_tag)
if v:
# There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a
# German language document.
if isinstance(v, set):
s.update(v)
else:
s.add(v)
value['language_{}'.format(sub_tag)] = s
elif tag == '{http://www.loc.gov/mods/v3}languageTerm': elif tag == '{http://www.loc.gov/mods/v3}languageTerm':
value['languageTerm'] = TagGroup(tag, group) \ value['languageTerm'] = TagGroup(tag, group) \
.has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \ .has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \
@ -333,11 +339,13 @@ def mods_to_dict(mods, raise_errors=True):
for n, e in enumerate(group): for n, e in enumerate(group):
value['name{}'.format(n)] = mods_to_dict(e, raise_errors) value['name{}'.format(n)] = mods_to_dict(e, raise_errors)
elif tag == '{http://www.loc.gov/mods/v3}role': elif tag == '{http://www.loc.gov/mods/v3}role':
value['role'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) value["role"] = TagGroup(tag, group) \
.has_no_attributes() \
.merge_sub_tags_to_set()
elif tag == '{http://www.loc.gov/mods/v3}roleTerm': elif tag == '{http://www.loc.gov/mods/v3}roleTerm':
value['roleTerm'] = TagGroup(tag, group) \ value['roleTerm'] = TagGroup(tag, group) \
.is_singleton().has_attributes({'authority': 'marcrelator', 'type': 'code'}) \ .has_attributes({'authority': 'marcrelator', 'type': 'code'}) \
.text() .text_set()
elif tag == '{http://www.loc.gov/mods/v3}namePart': elif tag == '{http://www.loc.gov/mods/v3}namePart':
for e in group: for e in group:
if not e.attrib.get('type'): if not e.attrib.get('type'):

@ -33,6 +33,44 @@ def test_multitple_language_languageTerm():
""") """)
assert d['language_languageTerm'] == {'ger', 'lat'} assert d['language_languageTerm'] == {'ger', 'lat'}
def test_role_roleTerm():
d = dict_fromstring("""
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
<mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
<mods:displayForm>Wurm, Mary</mods:displayForm>
<mods:namePart type="given">Mary</mods:namePart>
<mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
<mods:namePart type="family">Wurm</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
</mods:role>
</mods:name>
</mods:mods>
""")
assert d['name0_role_roleTerm'] == {'cmp'}
def test_multiple_role_roleTerm():
"""
Multiple mods:role/mods:roleTerm should be merged into one column.
"""
d = dict_fromstring("""
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
<mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
<mods:displayForm>Wurm, Mary</mods:displayForm>
<mods:namePart type="given">Mary</mods:namePart>
<mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
<mods:namePart type="family">Wurm</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
</mods:role>
<mods:role>
<mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
</mods:role>
</mods:name>
</mods:mods>
""")
assert d['name0_role_roleTerm'] == {'cmp', 'aut'}
def test_scriptTerm(): def test_scriptTerm():
""" """
Same language using different scripts have one mods:language, with multiple scriptTerms inside. Same language using different scripts have one mods:language, with multiple scriptTerms inside.

Loading…
Cancel
Save