mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-08-09 17:49:52 +02:00
Merge branch 'master' of https://github.com/qurator-spk/mods4pandas
This commit is contained in:
commit
c8744829cf
2 changed files with 16 additions and 0 deletions
|
@ -269,6 +269,14 @@ class TagGroup:
|
||||||
counts = {f"{xpath_expr}-count": len(values)}
|
counts = {f"{xpath_expr}-count": len(values)}
|
||||||
return counts
|
return counts
|
||||||
|
|
||||||
|
def fix_recordIdentifier_source_zdb(self) -> TagGroup:
|
||||||
|
for e in self.group:
|
||||||
|
if e.get("type") == "zdb":
|
||||||
|
e.attrib["source"] = "zdb"
|
||||||
|
del e.attrib["type"]
|
||||||
|
warnings.warn("Fixed recordIdentifier type 'zdb' to source")
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
def sorted_groupby(iterable, key=None):
|
def sorted_groupby(iterable, key=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -162,12 +162,16 @@ def mods_to_dict(mods, raise_errors=True):
|
||||||
.descend(raise_errors)
|
.descend(raise_errors)
|
||||||
)
|
)
|
||||||
elif tag == "{http://www.loc.gov/mods/v3}recordIdentifier":
|
elif tag == "{http://www.loc.gov/mods/v3}recordIdentifier":
|
||||||
|
def no_uuid(record_identifier):
|
||||||
|
return record_identifier.attrib.get("type") != "uuid"
|
||||||
|
|
||||||
# By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs),
|
# By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs),
|
||||||
# however, in mods:relatedItems, there may be source="dnb-ppns",
|
# however, in mods:relatedItems, there may be source="dnb-ppns",
|
||||||
# which we need to distinguish by using a separate field name.
|
# which we need to distinguish by using a separate field name.
|
||||||
try:
|
try:
|
||||||
value["recordIdentifier"] = (
|
value["recordIdentifier"] = (
|
||||||
TagGroup(tag, group)
|
TagGroup(tag, group)
|
||||||
|
.filter(no_uuid)
|
||||||
.is_singleton()
|
.is_singleton()
|
||||||
.has_attributes({"source": "gbv-ppn"})
|
.has_attributes({"source": "gbv-ppn"})
|
||||||
.text()
|
.text()
|
||||||
|
@ -320,6 +324,10 @@ def mods_to_dict(mods, raise_errors=True):
|
||||||
elif tag == "{http://www.loc.gov/mods/v3}mods":
|
elif tag == "{http://www.loc.gov/mods/v3}mods":
|
||||||
# XXX Ignore nested mods:mods for now (used in mods:subject)
|
# XXX Ignore nested mods:mods for now (used in mods:subject)
|
||||||
pass
|
pass
|
||||||
|
elif tag == "{http://www.loc.gov/mods/v3}issuance":
|
||||||
|
value["issuance"] = (
|
||||||
|
TagGroup(tag, group).is_singleton().has_no_attributes().text()
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if raise_errors:
|
if raise_errors:
|
||||||
raise ValueError('Unknown tag "{}"'.format(tag))
|
raise ValueError('Unknown tag "{}"'.format(tag))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue