diff --git a/src/mods4pandas/lib.py b/src/mods4pandas/lib.py index 0cf0407..4bf182f 100644 --- a/src/mods4pandas/lib.py +++ b/src/mods4pandas/lib.py @@ -269,6 +269,14 @@ class TagGroup: counts = {f"{xpath_expr}-count": len(values)} return counts + def fix_recordIdentifier_source_zdb(self) -> TagGroup: + for e in self.group: + if e.get("type") == "zdb": + e.attrib["source"] = "zdb" + del e.attrib["type"] + warnings.warn("Fixed recordIdentifier type 'zdb' to source") + return self + def sorted_groupby(iterable, key=None): """ diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 3d71268..2d8c8c7 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -162,12 +162,16 @@ def mods_to_dict(mods, raise_errors=True): .descend(raise_errors) ) elif tag == "{http://www.loc.gov/mods/v3}recordIdentifier": + def no_uuid(record_identifier): + return record_identifier.attrib.get("type") != "uuid" + # By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs), # however, in mods:relatedItems, there may be source="dnb-ppns", # which we need to distinguish by using a separate field name. try: value["recordIdentifier"] = ( TagGroup(tag, group) + .filter(no_uuid) .is_singleton() .has_attributes({"source": "gbv-ppn"}) .text() @@ -320,6 +324,10 @@ def mods_to_dict(mods, raise_errors=True): elif tag == "{http://www.loc.gov/mods/v3}mods": # XXX Ignore nested mods:mods for now (used in mods:subject) pass + elif tag == "{http://www.loc.gov/mods/v3}issuance": + value["issuance"] = ( + TagGroup(tag, group).is_singleton().has_no_attributes().text() + ) else: if raise_errors: raise ValueError('Unknown tag "{}"'.format(tag))