From 6226618f400973818a8d6042d8a220604fbec2af Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 12 Dec 2023 12:34:24 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20mods4pandas:=20Handle=20multivol?= =?UTF-8?q?ume=5Fwork=20without=20structMap=20TYPE=3D'PHYSICAL'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/mods4pandas/mods4pandas.py | 6 +- ...multivolume_work-no-structMap-PHYSICAL.xml | 114 ++++++++++++++++++ qurator/mods4pandas/tests/test_page_info.py | 8 ++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 qurator/mods4pandas/tests/data/mets-mods/PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml diff --git a/qurator/mods4pandas/mods4pandas.py b/qurator/mods4pandas/mods4pandas.py index 7d187e1..028b9ac 100755 --- a/qurator/mods4pandas/mods4pandas.py +++ b/qurator/mods4pandas/mods4pandas.py @@ -270,7 +270,11 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]: structMap_LOGICAL = mets.find('./mets:structMap[@TYPE="LOGICAL"]', ns) fileSec = mets.find('./mets:fileSec', ns) if structMap_PHYSICAL is None: - raise ValueError("No structMap[@TYPE='PHYSICAL'] found") + # This is expected in a multivolume work! + if structMap_LOGICAL.find('./mets:div[@TYPE="multivolume_work"]', ns) is not None: + return [] + else: + raise ValueError("No structMap[@TYPE='PHYSICAL'] found (but not a multivolume work)") if structMap_LOGICAL is None: raise ValueError("No structMap[@TYPE='LOGICAL'] found") if fileSec is None: diff --git a/qurator/mods4pandas/tests/data/mets-mods/PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml b/qurator/mods4pandas/tests/data/mets-mods/PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml new file mode 100644 index 0000000..3ed745c --- /dev/null +++ b/qurator/mods4pandas/tests/data/mets-mods/PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml @@ -0,0 +1,114 @@ + + + + + Goobi - UGH-1.11.1-v1.11.0-11-gbafb11b - 16−November−2015 + Goobi + + + + + + + + + Herborn + + Buchhandlung des Nassauischen Colportagevereins + 1916 + + + + Berlin + + Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Germany + [Electronic ed.] + + Krieg 1914-1918 + Historische Drucke + + PPN717884805 + + http://resolver.staatsbibliothek-berlin.de/SBB00008D1E00000000 + + + PPN242046452 + + + + Die Predigt des Evangeliums in der Zeitenwende + Erläuterungen und Dispositionen zu den altkirchlichen und den Eisenacher Perikopen und zu freien Texten unter besonderer Berücksichtigung der Kriegszeit + + P_Drucke_Europeana1914-1918 + + book + + Weltkr. 625 + + ger + + + + Europeana Collections 1914-1918 + + + + + aut + + Dunkmann + Karl + Dunkmann, Karl + + + reformatted digital + + + 217 + + + sh2010119545 + sh2008113843 + + UNKNOWN + text + + + + + + + + + + Staatsbibliothek zu Berlin - Preußischer Kulturbesitz + http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000 + http://www.staatsbibliothek-berlin.de + mailto:info@sbb.spk-berlin.de + + + + + + + + + http://www.stabikat.de/DB=1/PPN?PPN=717884805 + http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN717884805 + https://content.staatsbibliothek-berlin.de/dc/PPN717884805/manifest + + + + + + + + + + + + + + + + diff --git a/qurator/mods4pandas/tests/test_page_info.py b/qurator/mods4pandas/tests/test_page_info.py index a740ffd..87eeac7 100644 --- a/qurator/mods4pandas/tests/test_page_info.py +++ b/qurator/mods4pandas/tests/test_page_info.py @@ -35,3 +35,11 @@ def test_page_info(): # structMap. struct_types = sorted(removeprefix(k, "structMap-LOGICAL_TYPE_") for k, v in page_info_page.items() if k.startswith("structMap-LOGICAL_TYPE_") and v == 1) assert struct_types == ["illustration", "monograph", "title_page"] + + +def test_page_info_multivolume_work(): + """Test creation of page_info for multivolume_work""" + mets = ET.parse(TESTS_DATA_DIR / "mets-mods" / "PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml") + page_info = pages_to_dict(mets) + assert page_info == [] +