From 75ff143a2541b5c34ecd9e7f0243799abc806900 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 1 Apr 2022 14:02:19 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20modstool:=20Fix=20handling=20mul?= =?UTF-8?q?tiple=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There may be multiple tags with mods:languageTerm children, that should be merged into one language_languageTerm column --- qurator/modstool/modstool.py | 4 ++-- qurator/modstool/tests/test_modstool.py | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/qurator/modstool/modstool.py b/qurator/modstool/modstool.py index 9d8f6e7..e122922 100755 --- a/qurator/modstool/modstool.py +++ b/qurator/modstool/modstool.py @@ -320,8 +320,8 @@ def mods_to_dict(mods, raise_errors=True): value['language_{}'.format(sub_tag)] = s elif tag == '{http://www.loc.gov/mods/v3}languageTerm': value['languageTerm'] = TagGroup(tag, group) \ - .is_singleton().has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \ - .text() + .has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \ + .text_set() elif tag == '{http://www.loc.gov/mods/v3}scriptTerm': value['scriptTerm'] = TagGroup(tag, group) \ .fix_script_term() \ diff --git a/qurator/modstool/tests/test_modstool.py b/qurator/modstool/tests/test_modstool.py index 8fb0864..10ec3b6 100644 --- a/qurator/modstool/tests/test_modstool.py +++ b/qurator/modstool/tests/test_modstool.py @@ -6,12 +6,23 @@ from .. import mods_to_dict, flatten def dict_fromstring(x): - """Helper function to parse a MODS XML string to a flattened dict""" - return flatten(mods_to_dict(ET.fromstring(x))) + """Helper function to parse a MODS XML string to a flattened dict""" + return flatten(mods_to_dict(ET.fromstring(x))) -def test_languageTerm(): +def test_single_language_languageTerm(): + d = dict_fromstring(""" + + + lat + ger + + + """) + assert d['language_languageTerm'] == {'ger', 'lat'} + +def test_multitple_language_languageTerm(): """ - Different languages have multiple mods:language elements. + Different languages MAY have multiple mods:language elements. See MODS-AP 2.3.1 """ d = dict_fromstring("""