mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-08 03:10:15 +02:00
🐛 modstool: Handle multiple scriptTerms per language correctly
This commit is contained in:
parent
2259d01843
commit
5773b9c9b1
2 changed files with 17 additions and 4 deletions
|
@ -298,17 +298,26 @@ def mods_to_dict(mods, raise_errors=True):
|
|||
sub_dicts = [mods_to_dict(e) for e in group]
|
||||
sub_tags = {k for d in sub_dicts for k in d.keys()}
|
||||
for sub_tag in sub_tags:
|
||||
value['language_{}'.format(sub_tag)] = {d.get(sub_tag) for d in sub_dicts if d.get(sub_tag)}
|
||||
s = set()
|
||||
for d in sub_dicts:
|
||||
v = d.get(sub_tag)
|
||||
if v:
|
||||
# There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a
|
||||
# German language document.
|
||||
if isinstance(v, set):
|
||||
s.update(v)
|
||||
else:
|
||||
s.add(v)
|
||||
value['language_{}'.format(sub_tag)] = s
|
||||
elif tag == '{http://www.loc.gov/mods/v3}languageTerm':
|
||||
value['languageTerm'] = TagGroup(tag, group) \
|
||||
.is_singleton().has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \
|
||||
.text()
|
||||
elif tag == '{http://www.loc.gov/mods/v3}scriptTerm':
|
||||
value['scriptTerm'] = TagGroup(tag, group) \
|
||||
.is_singleton() \
|
||||
.fix_script_term() \
|
||||
.has_attributes({'authority': 'iso15924', 'type': 'code'}) \
|
||||
.text()
|
||||
.text_set()
|
||||
elif tag == '{http://www.loc.gov/mods/v3}relatedItem':
|
||||
pass
|
||||
elif tag == '{http://www.loc.gov/mods/v3}name':
|
||||
|
|
|
@ -33,9 +33,13 @@ def test_scriptTerm():
|
|||
<mods:scriptTerm authority="iso15924" type="code">215</mods:scriptTerm>
|
||||
<mods:scriptTerm authority="iso15924" type="code">217</mods:scriptTerm>
|
||||
</mods:language>
|
||||
<mods:language>
|
||||
<mods:languageTerm authority="iso639-2b" type="code">lat</mods:languageTerm>
|
||||
<mods:scriptTerm authority="iso15924" type="code">216</mods:scriptTerm>
|
||||
</mods:language>
|
||||
</mods:mods>
|
||||
""")
|
||||
assert d['language_scriptTerm'] == {'215', '217'}
|
||||
assert d['language_scriptTerm'] == {'215', '216', '217'}
|
||||
|
||||
def test_recordInfo():
|
||||
d = dict_fromstring("""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue