From 100b2a5e6c6e8f1e407875e6e2b74d9c60b77ff0 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 17 Apr 2023 19:21:43 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20=20Fix=20mods:relatedItem=20with?= =?UTF-8?q?=20mods:recordIdentifier=20source=3Ddnb-ppn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mods:relatedItem may have a mods:recordIdentifier with source="dnb-ppn". This may happen for digitized works that have their original work in another library and use a DNB PPN there. Fixes gh-22. --- qurator/mods4pandas/mods4pandas.py | 8 +++++++- qurator/mods4pandas/tests/test_modstool.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/qurator/mods4pandas/mods4pandas.py b/qurator/mods4pandas/mods4pandas.py index 781c42c..45fb675 100755 --- a/qurator/mods4pandas/mods4pandas.py +++ b/qurator/mods4pandas/mods4pandas.py @@ -95,7 +95,13 @@ def mods_to_dict(mods, raise_errors=True): elif tag == '{http://www.loc.gov/mods/v3}recordInfo': value['recordInfo'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) elif tag == '{http://www.loc.gov/mods/v3}recordIdentifier': - value['recordIdentifier'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'gbv-ppn'}).text() + # By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs), + # however, in mods:relatedItems, there may be source="dnb-ppns", + # which we need to distinguish by using a separate field name. + try: + value['recordIdentifier'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'gbv-ppn'}).text() + except ValueError: + value['recordIdentifier-dnb-ppn'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'dnb-ppn'}).text() elif tag == '{http://www.loc.gov/mods/v3}identifier': for e in group: if len(e.attrib) != 1: diff --git a/qurator/mods4pandas/tests/test_modstool.py b/qurator/mods4pandas/tests/test_modstool.py index 5d628a2..3fb6460 100644 --- a/qurator/mods4pandas/tests/test_modstool.py +++ b/qurator/mods4pandas/tests/test_modstool.py @@ -138,3 +138,16 @@ def test_relatedItem(): """) assert d['relatedItem-original_recordInfo_recordIdentifier'] == 'PPN167755803' + + # mods:relatedItem may also have source="dnb-ppn" recordIdentifiers: + d = dict_fromstring(""" + + + + 1236513355 + + + + """) + + assert d['relatedItem-original_recordInfo_recordIdentifier-dnb-ppn'] == '1236513355'