diff --git a/qurator/mods4pandas/mods4pandas.py b/qurator/mods4pandas/mods4pandas.py index 781c42c..45fb675 100755 --- a/qurator/mods4pandas/mods4pandas.py +++ b/qurator/mods4pandas/mods4pandas.py @@ -95,7 +95,13 @@ def mods_to_dict(mods, raise_errors=True): elif tag == '{http://www.loc.gov/mods/v3}recordInfo': value['recordInfo'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) elif tag == '{http://www.loc.gov/mods/v3}recordIdentifier': - value['recordIdentifier'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'gbv-ppn'}).text() + # By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs), + # however, in mods:relatedItems, there may be source="dnb-ppns", + # which we need to distinguish by using a separate field name. + try: + value['recordIdentifier'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'gbv-ppn'}).text() + except ValueError: + value['recordIdentifier-dnb-ppn'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'dnb-ppn'}).text() elif tag == '{http://www.loc.gov/mods/v3}identifier': for e in group: if len(e.attrib) != 1: diff --git a/qurator/mods4pandas/tests/test_modstool.py b/qurator/mods4pandas/tests/test_modstool.py index 5d628a2..3fb6460 100644 --- a/qurator/mods4pandas/tests/test_modstool.py +++ b/qurator/mods4pandas/tests/test_modstool.py @@ -138,3 +138,16 @@ def test_relatedItem(): """) assert d['relatedItem-original_recordInfo_recordIdentifier'] == 'PPN167755803' + + # mods:relatedItem may also have source="dnb-ppn" recordIdentifiers: + d = dict_fromstring(""" + + + + 1236513355 + + + + """) + + assert d['relatedItem-original_recordInfo_recordIdentifier-dnb-ppn'] == '1236513355'