From f893b339c56f505f21034f540ef2b66f2a81bc89 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 3 Jun 2020 10:10:54 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20ppn2ocr:=20Properly=20remove=20t?= =?UTF-8?q?he=20PRESENTATION=20file=20group?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ppn2ocr | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ppn2ocr b/ppn2ocr index 766b6d6..109f07f 100755 --- a/ppn2ocr +++ b/ppn2ocr @@ -70,6 +70,14 @@ def iiif_url_for_dms_url(dms_url, ppn, size): return iiif_url +def remove_file_grp(mets, use): + for bad_fileid in mets.xpath(f'//mets:fileGrp[@USE="{use}"]/mets:file/@ID', namespaces=XMLNS): + for bad in mets.xpath(f'//mets:fptr[@FILEID="{bad_fileid}"]', namespaces=XMLNS): + bad.getparent().remove(bad) + for bad in mets.xpath(f'//mets:fileGrp[@USE="{use}"]', namespaces=XMLNS): + bad.getparent().remove(bad) + + def make_workspace(ppn, workspace): # Make workspace directory os.mkdir(workspace) @@ -80,8 +88,7 @@ def make_workspace(ppn, workspace): # XXX # Delete PRESENTATION file group # (local file:/// links, not handled well by "ocrd workspace") - for bad in mets.xpath('//mets:fileGrp[@USE="PRESENTATION"]', namespaces=XMLNS): - bad.getparent().remove(bad) + remove_file_grp(mets, 'PRESENTATION') # Duplicate DEFAULT file group into a new file group BEST