From 5773b9c9b146a39e45d73a36fca6d918a885a29e Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 29 Aug 2019 15:42:13 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20modstool:=20Handle=20multiple=20?= =?UTF-8?q?scriptTerms=20per=20language=20correctly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/modstool/modstool.py | 15 ++++++++++++--- qurator/modstool/tests/test_modstool.py | 6 +++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/qurator/modstool/modstool.py b/qurator/modstool/modstool.py index e98e3c0..eecc5b8 100755 --- a/qurator/modstool/modstool.py +++ b/qurator/modstool/modstool.py @@ -298,17 +298,26 @@ def mods_to_dict(mods, raise_errors=True): sub_dicts = [mods_to_dict(e) for e in group] sub_tags = {k for d in sub_dicts for k in d.keys()} for sub_tag in sub_tags: - value['language_{}'.format(sub_tag)] = {d.get(sub_tag) for d in sub_dicts if d.get(sub_tag)} + s = set() + for d in sub_dicts: + v = d.get(sub_tag) + if v: + # There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a + # German language document. + if isinstance(v, set): + s.update(v) + else: + s.add(v) + value['language_{}'.format(sub_tag)] = s elif tag == '{http://www.loc.gov/mods/v3}languageTerm': value['languageTerm'] = TagGroup(tag, group) \ .is_singleton().has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \ .text() elif tag == '{http://www.loc.gov/mods/v3}scriptTerm': value['scriptTerm'] = TagGroup(tag, group) \ - .is_singleton() \ .fix_script_term() \ .has_attributes({'authority': 'iso15924', 'type': 'code'}) \ - .text() + .text_set() elif tag == '{http://www.loc.gov/mods/v3}relatedItem': pass elif tag == '{http://www.loc.gov/mods/v3}name': diff --git a/qurator/modstool/tests/test_modstool.py b/qurator/modstool/tests/test_modstool.py index 1b990a8..87a0b94 100644 --- a/qurator/modstool/tests/test_modstool.py +++ b/qurator/modstool/tests/test_modstool.py @@ -33,9 +33,13 @@ def test_scriptTerm(): 215 217 + + lat + 216 + """) - assert d['language_scriptTerm'] == {'215', '217'} + assert d['language_scriptTerm'] == {'215', '216', '217'} def test_recordInfo(): d = dict_fromstring("""