🚧 WIP: Migrate to using ocrd:all image - Move extra script to their own sub-directory

This commit is contained in:
Gerber, Mike 2024-04-25 20:29:29 +02:00
parent 70c0da0cec
commit 5dffd843aa
5 changed files with 0 additions and 0 deletions

31
zdb2ocr
View file

@ -1,31 +0,0 @@
#!/bin/sh
zdb=27974534
yyyymmdd=19010712
set -e
self_dir=`dirname $0`
self_dir=`realpath $self_dir`
workspace=$zdb-$yyyymmdd
mkdir "$workspace"
cd "$workspace"
pwd
zefys_url="https://content.staatsbibliothek-berlin.de/zefys/SNP$zdb-$yyyymmdd-0-0-0-0.xml"
echo "$zefys_url"
curl "$zefys_url" > mets.xml
ocrd workspace validate mets.xml | grep -v "<notice>Won't download remote image"
$self_dir/run-docker-hub -I MAX --skip-validation
# * TODO: Error on invocation
# * TODO: Check out options to get better image resolutions
# * TODO: Are input images already grayscale? Further binarization makes them
# worse than before
# * TODO: Does this loose the image URLs for the MAX filegroup?
# * TODO: Lots of text problems with ocrd_calamari "not the same as Calamari"