🐛 mods4pandas: Handle multivolume_work without structMap TYPE='PHYSICAL'

master
Gerber, Mike 1 year ago
parent 0acaa83163
commit 6226618f40

@ -270,7 +270,11 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
structMap_LOGICAL = mets.find('./mets:structMap[@TYPE="LOGICAL"]', ns) structMap_LOGICAL = mets.find('./mets:structMap[@TYPE="LOGICAL"]', ns)
fileSec = mets.find('./mets:fileSec', ns) fileSec = mets.find('./mets:fileSec', ns)
if structMap_PHYSICAL is None: if structMap_PHYSICAL is None:
raise ValueError("No structMap[@TYPE='PHYSICAL'] found") # This is expected in a multivolume work!
if structMap_LOGICAL.find('./mets:div[@TYPE="multivolume_work"]', ns) is not None:
return []
else:
raise ValueError("No structMap[@TYPE='PHYSICAL'] found (but not a multivolume work)")
if structMap_LOGICAL is None: if structMap_LOGICAL is None:
raise ValueError("No structMap[@TYPE='LOGICAL'] found") raise ValueError("No structMap[@TYPE='LOGICAL'] found")
if fileSec is None: if fileSec is None:

@ -0,0 +1,114 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
<mets:metsHdr CREATEDATE="2019-02-01T13:50:33">
<mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
<mets:name>Goobi - UGH-1.11.1-v1.11.0-11-gbafb11b - 16November2015</mets:name>
<mets:note>Goobi</mets:note>
</mets:agent>
</mets:metsHdr>
<mets:dmdSec ID="DMDLOG_0000">
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
<mods:originInfo eventType="publication">
<mods:place>
<mods:placeTerm type="text">Herborn</mods:placeTerm>
</mods:place>
<mods:publisher>Buchhandlung des Nassauischen Colportagevereins</mods:publisher>
<mods:dateIssued encoding="iso8601" keyDate="yes" point="start">1916</mods:dateIssued>
</mods:originInfo>
<mods:originInfo eventType="digitization">
<mods:place>
<mods:placeTerm type="text">Berlin</mods:placeTerm>
</mods:place>
<mods:publisher>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Germany</mods:publisher>
<mods:edition>[Electronic ed.]</mods:edition>
</mods:originInfo>
<mods:classification authority="ZVDD">Krieg 1914-1918</mods:classification>
<mods:classification authority="ZVDD">Historische Drucke</mods:classification>
<mods:recordInfo>
<mods:recordIdentifier source="gbv-ppn">PPN717884805</mods:recordIdentifier>
</mods:recordInfo>
<mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB00008D1E00000000</mods:identifier>
<mods:relatedItem type="original">
<mods:recordInfo>
<mods:recordIdentifier source="gbv-ppn">PPN242046452</mods:recordIdentifier>
</mods:recordInfo>
</mods:relatedItem>
<mods:titleInfo>
<mods:title>Die Predigt des Evangeliums in der Zeitenwende</mods:title>
<mods:subTitle>Erläuterungen und Dispositionen zu den altkirchlichen und den Eisenacher Perikopen und zu freien Texten unter besonderer Berücksichtigung der Kriegszeit</mods:subTitle>
</mods:titleInfo>
<mods:note type="source characteristics">P_Drucke_Europeana1914-1918</mods:note>
<mods:subject authority="EC1418">
<mods:genre>book</mods:genre>
</mods:subject>
<mods:classification authority="sbb">Weltkr. 625</mods:classification>
<mods:language>
<mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
</mods:language>
<mods:relatedItem type="series">
<mods:titleInfo>
<mods:title>Europeana Collections 1914-1918</mods:title>
</mods:titleInfo>
</mods:relatedItem>
<mods:name type="personal">
<mods:role>
<mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
</mods:role>
<mods:namePart type="family">Dunkmann</mods:namePart>
<mods:namePart type="given">Karl</mods:namePart>
<mods:displayForm>Dunkmann, Karl</mods:displayForm>
</mods:name>
<mods:physicalDescription>
<mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
</mods:physicalDescription>
<mods:language>
<mods:scriptTerm authority="iso15924" type="code">217</mods:scriptTerm>
</mods:language>
<mods:subject authority="lcsh">
<mods:topic>sh2010119545</mods:topic>
<mods:topic>sh2008113843</mods:topic>
</mods:subject>
<mods:accessCondition type="use and reproduction">UNKNOWN</mods:accessCondition>
<mods:typeOfResource>text</mods:typeOfResource>
</mods:mods>
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
<mets:amdSec ID="AMD">
<mets:rightsMD ID="RIGHTS">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
<mets:xmlData>
<dv:rights xmlns:dv="http://dfg-viewer.de/">
<dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
<dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
<dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
<dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
</dv:rights>
</mets:xmlData>
</mets:mdWrap>
</mets:rightsMD>
<mets:digiprovMD ID="DIGIPROV">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
<mets:xmlData>
<dv:links xmlns:dv="http://dfg-viewer.de/">
<dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=717884805 </dv:reference>
<dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN717884805</dv:presentation>
<dv:iiif>https://content.staatsbibliothek-berlin.de/dc/PPN717884805/manifest</dv:iiif>
</dv:links>
</mets:xmlData>
</mets:mdWrap>
</mets:digiprovMD>
</mets:amdSec>
<mets:structMap TYPE="LOGICAL">
<mets:div ADMID="AMD" CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB00008D1E00000000" DMDID="DMDLOG_0000" ID="LOG_0000" LABEL="Die Predigt des Evangeliums in der Zeitenwende" ORDERLABEL="Predigt des Evangeliums in der Zeitenwende" TYPE="multivolume_work">
<mets:div ID="LOG_0001" LABEL="Altkirchliche Perikopen" ORDERLABEL="Altkirchliche Perikopen" TYPE="volume">
<mets:mptr xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://digital.staatsbibliothek-berlin.de/dms/metsresolver/?PPN=PPN717885003"/>
</mets:div>
<mets:div ID="LOG_0002" TYPE="Volume" LABEL="Eisenacher Perikopen Bd. 2" ORDER="20">
<mets:mptr xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://digital.staatsbibliothek-berlin.de/dms/metsresolver/?PPN=PPN717885429"/>
</mets:div>
</mets:div>
</mets:structMap>
</mets:mets>

@ -35,3 +35,11 @@ def test_page_info():
# structMap. # structMap.
struct_types = sorted(removeprefix(k, "structMap-LOGICAL_TYPE_") for k, v in page_info_page.items() if k.startswith("structMap-LOGICAL_TYPE_") and v == 1) struct_types = sorted(removeprefix(k, "structMap-LOGICAL_TYPE_") for k, v in page_info_page.items() if k.startswith("structMap-LOGICAL_TYPE_") and v == 1)
assert struct_types == ["illustration", "monograph", "title_page"] assert struct_types == ["illustration", "monograph", "title_page"]
def test_page_info_multivolume_work():
"""Test creation of page_info for multivolume_work"""
mets = ET.parse(TESTS_DATA_DIR / "mets-mods" / "PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml")
page_info = pages_to_dict(mets)
assert page_info == []

Loading…
Cancel
Save