From 8024064697902c14928a407945e522185c475a38 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 22 May 2020 16:09:00 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20ppn2ocr:=20Fix=20file:/=20links?= =?UTF-8?q?=20to=20use=20file:///,=20and=20remove=20unavaiblable=20LOCAL?= =?UTF-8?q?=20file=20group?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ppn2ocr | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ppn2ocr b/ppn2ocr index d9aa7ef..932985a 100755 --- a/ppn2ocr +++ b/ppn2ocr @@ -18,9 +18,14 @@ oai_url="https://digital.staatsbibliothek-berlin.de/oai?verb=GetRecord&metadataP echo "$oai_url" curl "$oai_url" | xmlstarlet sel -t -c '//*[local-name()="mets"]' > mets.xml +# Fix 'file:/' URLs to 'file:///' +sed -i 's#file:/\([^/]\)#file:///\1#' mets.xml +# Patch mets.xml to use our NFS mount sed -i 's#file:///goobi/tiff001/sbb/#file:///srv/digisam_images/sbb/#g' mets.xml +# Remove LOCAL file group as we do not have access to the files +ocrd workspace remove-group -rf --keep-files LOCAL ocrd workspace validate mets.xml | grep -v "Won't download remote image"