#!/bin/sh zdb=27974534 yyyymmdd=19010712 set -e self_dir=`dirname $0` self_dir=`realpath $self_dir` workspace=$zdb-$yyyymmdd mkdir "$workspace" cd "$workspace" pwd zefys_url="https://content.staatsbibliothek-berlin.de/zefys/SNP$zdb-$yyyymmdd-0-0-0-0.xml" echo "$zefys_url" curl "$zefys_url" > mets.xml ocrd workspace validate mets.xml | grep -v "Won't download remote image" $self_dir/run-docker-hub -I MAX --skip-validation # * TODO: Error on invocation # * TODO: Check out options to get better image resolutions # * TODO: Are input images already grayscale? Further binarization makes them # worse than before # * TODO: Does this loose the image URLs for the MAX filegroup? # * TODO: Lots of text problems with ocrd_calamari "not the same as Calamari"