mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-08-07 00:29:55 +02:00
Merge branch 'master' of https://github.com/qurator-spk/mods4pandas
This commit is contained in:
commit
c8744829cf
2 changed files with 16 additions and 0 deletions
|
@ -269,6 +269,14 @@ class TagGroup:
|
|||
counts = {f"{xpath_expr}-count": len(values)}
|
||||
return counts
|
||||
|
||||
def fix_recordIdentifier_source_zdb(self) -> TagGroup:
|
||||
for e in self.group:
|
||||
if e.get("type") == "zdb":
|
||||
e.attrib["source"] = "zdb"
|
||||
del e.attrib["type"]
|
||||
warnings.warn("Fixed recordIdentifier type 'zdb' to source")
|
||||
return self
|
||||
|
||||
|
||||
def sorted_groupby(iterable, key=None):
|
||||
"""
|
||||
|
|
|
@ -162,12 +162,16 @@ def mods_to_dict(mods, raise_errors=True):
|
|||
.descend(raise_errors)
|
||||
)
|
||||
elif tag == "{http://www.loc.gov/mods/v3}recordIdentifier":
|
||||
def no_uuid(record_identifier):
|
||||
return record_identifier.attrib.get("type") != "uuid"
|
||||
|
||||
# By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs),
|
||||
# however, in mods:relatedItems, there may be source="dnb-ppns",
|
||||
# which we need to distinguish by using a separate field name.
|
||||
try:
|
||||
value["recordIdentifier"] = (
|
||||
TagGroup(tag, group)
|
||||
.filter(no_uuid)
|
||||
.is_singleton()
|
||||
.has_attributes({"source": "gbv-ppn"})
|
||||
.text()
|
||||
|
@ -320,6 +324,10 @@ def mods_to_dict(mods, raise_errors=True):
|
|||
elif tag == "{http://www.loc.gov/mods/v3}mods":
|
||||
# XXX Ignore nested mods:mods for now (used in mods:subject)
|
||||
pass
|
||||
elif tag == "{http://www.loc.gov/mods/v3}issuance":
|
||||
value["issuance"] = (
|
||||
TagGroup(tag, group).is_singleton().has_no_attributes().text()
|
||||
)
|
||||
else:
|
||||
if raise_errors:
|
||||
raise ValueError('Unknown tag "{}"'.format(tag))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue