From 91296ffa0e09dd1e21b36ef1c1df09d248e3445b Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 15 Sep 2021 17:12:11 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20ppn2ocr:=20Move=20pruning?= =?UTF-8?q?=20file=20groups=20into=20a=20function?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ppn2ocr | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ppn2ocr b/ppn2ocr index d880a08..a152acc 100755 --- a/ppn2ocr +++ b/ppn2ocr @@ -78,6 +78,14 @@ def mime_type_for_format(format_): return mime_type +def prune_file_grps(mets): + # XXX + # Delete PRESENTATION + LOCAL file groups + # (local file:/// or file:/ links, not handled well by "ocrd workspace") + remove_file_grp(mets, 'PRESENTATION') + remove_file_grp(mets, 'LOCAL') + + def make_workspace(ppn, workspace): # Make workspace directory os.mkdir(workspace) @@ -85,11 +93,6 @@ def make_workspace(ppn, workspace): mets = oai_mets(ppn) - # XXX - # Delete PRESENTATION + LOCAL file groups - # (local file:/// or file:/ links, not handled well by "ocrd workspace") - remove_file_grp(mets, 'PRESENTATION') - remove_file_grp(mets, 'LOCAL') # Delete MAX file group - we assume that, if it exists, it is not as @@ -125,6 +128,10 @@ def make_workspace(ppn, workspace): mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best) + + prune_file_grps(mets) + + # Write mets.xml mets.write('mets.xml', pretty_print=True)