From 9303f4b4dfa755e117f8d627eef54a2cad70f1c0 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 22 May 2020 13:43:11 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20zdb2ocr:=20Produce=20OCR=20of=20?= =?UTF-8?q?ZEFYS=20newspapers=20(WIP)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- zdb2ocr | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100755 zdb2ocr diff --git a/zdb2ocr b/zdb2ocr new file mode 100755 index 0000000..1091cd4 --- /dev/null +++ b/zdb2ocr @@ -0,0 +1,23 @@ +#!/bin/sh +zdb=27974534 +yyyymmdd=19010712 + +set -e + +self_dir=`dirname $0` +self_dir=`realpath $self_dir` + + +workspace=$zdb-$yyyymmdd +mkdir "$workspace" +cd "$workspace" +pwd +zefys_url="https://content.staatsbibliothek-berlin.de/zefys/SNP$zdb-$yyyymmdd-0-0-0-0.xml" +echo "$zefys_url" +curl "$zefys_url" > mets.xml + + +ocrd workspace validate mets.xml | grep -v "Won't download remote image" + + +$self_dir/run-docker-hub -I MAX --skip-validation