You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
#!/bin/sh
|
|
|
|
zdb=27974534
|
|
|
|
yyyymmdd=19010712
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
self_dir=`dirname $0`
|
|
|
|
self_dir=`realpath $self_dir`
|
|
|
|
|
|
|
|
|
|
|
|
workspace=$zdb-$yyyymmdd
|
|
|
|
mkdir "$workspace"
|
|
|
|
cd "$workspace"
|
|
|
|
pwd
|
|
|
|
zefys_url="https://content.staatsbibliothek-berlin.de/zefys/SNP$zdb-$yyyymmdd-0-0-0-0.xml"
|
|
|
|
echo "$zefys_url"
|
|
|
|
curl "$zefys_url" > mets.xml
|
|
|
|
|
|
|
|
|
|
|
|
ocrd workspace validate mets.xml | grep -v "<notice>Won't download remote image"
|
|
|
|
|
|
|
|
|
|
|
|
$self_dir/run-docker-hub -I MAX --skip-validation
|
|
|
|
|
|
|
|
|
|
|
|
# * TODO: Error on invocation
|
|
|
|
# * TODO: Check out options to get better image resolutions
|
|
|
|
# * TODO: Are input images already grayscale? Further binarization makes them
|
|
|
|
# worse than before
|
|
|
|
# * TODO: Does this loose the image URLs for the MAX filegroup?
|
|
|
|
# * TODO: Lots of text problems with ocrd_calamari "not the same as Calamari"
|