From b8a287258240fb149b9976667ce2b0170bbb2519 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 12 Dec 2023 13:13:23 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20mods4pandas:=20Handle=20periodic?= =?UTF-8?q?al=20without=20structMap=20TYPE=3D'PHYSICAL'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/mods4pandas/mods4pandas.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/qurator/mods4pandas/mods4pandas.py b/qurator/mods4pandas/mods4pandas.py index 028b9ac..75f2caf 100755 --- a/qurator/mods4pandas/mods4pandas.py +++ b/qurator/mods4pandas/mods4pandas.py @@ -270,8 +270,11 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]: structMap_LOGICAL = mets.find('./mets:structMap[@TYPE="LOGICAL"]', ns) fileSec = mets.find('./mets:fileSec', ns) if structMap_PHYSICAL is None: - # This is expected in a multivolume work! - if structMap_LOGICAL.find('./mets:div[@TYPE="multivolume_work"]', ns) is not None: + # This is expected in a multivolume work or periodical! + if any( + structMap_LOGICAL.find(f'./mets:div[@TYPE="{t}"]', ns) is not None + for t in ["multivolume_work", "periodical"] + ): return [] else: raise ValueError("No structMap[@TYPE='PHYSICAL'] found (but not a multivolume work)")