ocrd-galley/tests/test-ocrd_cis.sh

31 lines
938 B
Bash
Raw Normal View History

2023-08-17 18:34:15 +02:00
#!/bin/sh
set -ex
test_id=`basename $0`
cd `mktemp -d /tmp/$test_id-XXXXX`
2023-08-17 18:34:15 +02:00
2024-05-06 23:40:55 +02:00
OCRD_CIS_OCROPY_MODEL=fraktur.pyrnn.gz
# Prepare processors
ocrd resmgr download ocrd-cis-ocropy-recognize $OCRD_CIS_OCROPY_MODEL
2023-08-17 18:34:15 +02:00
# Prepare test workspace
2024-05-06 23:40:55 +02:00
wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip
unzip actevedef_718448162.first-page.zip
cd actevedef_718448162.first-page
# XXX ocrd-cis-ocropy-segment wasn't happy with the binarized input (no
# "binarized" AlternativeImage?!), so we do it here again
ocrd-skimage-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN
2023-08-17 18:34:15 +02:00
# Run tests
2024-05-06 23:40:55 +02:00
ocrd-cis-ocropy-segment \
-I OCR-D-IMG-BIN -O TEST-CIS-OCROPY-SEG-LINE \
-P level-of-operation page
test "$(grep TextLine TEST-CIS-OCROPY-SEG-LINE/*.xml | wc -l)" -gt 50
ocrd-cis-ocropy-recognize \
-I TEST-CIS-OCROPY-SEG-LINE -O TEST-CIS-OCROPY-OCR \
-P model $OCRD_CIS_OCROPY_MODEL
test "$(grep Unicode TEST-CIS-OCROPY-OCR/*.xml | wc -l)" -gt 50