1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-08-07 00:29:55 +02:00
This commit is contained in:
Mike Gerber 2025-08-06 20:19:46 +02:00
commit c8744829cf
2 changed files with 16 additions and 0 deletions

View file

@ -269,6 +269,14 @@ class TagGroup:
counts = {f"{xpath_expr}-count": len(values)}
return counts
def fix_recordIdentifier_source_zdb(self) -> TagGroup:
for e in self.group:
if e.get("type") == "zdb":
e.attrib["source"] = "zdb"
del e.attrib["type"]
warnings.warn("Fixed recordIdentifier type 'zdb' to source")
return self
def sorted_groupby(iterable, key=None):
"""

View file

@ -162,12 +162,16 @@ def mods_to_dict(mods, raise_errors=True):
.descend(raise_errors)
)
elif tag == "{http://www.loc.gov/mods/v3}recordIdentifier":
def no_uuid(record_identifier):
return record_identifier.attrib.get("type") != "uuid"
# By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs),
# however, in mods:relatedItems, there may be source="dnb-ppns",
# which we need to distinguish by using a separate field name.
try:
value["recordIdentifier"] = (
TagGroup(tag, group)
.filter(no_uuid)
.is_singleton()
.has_attributes({"source": "gbv-ppn"})
.text()
@ -320,6 +324,10 @@ def mods_to_dict(mods, raise_errors=True):
elif tag == "{http://www.loc.gov/mods/v3}mods":
# XXX Ignore nested mods:mods for now (used in mods:subject)
pass
elif tag == "{http://www.loc.gov/mods/v3}issuance":
value["issuance"] = (
TagGroup(tag, group).is_singleton().has_no_attributes().text()
)
else:
if raise_errors:
raise ValueError('Unknown tag "{}"'.format(tag))