From 956de7492f9061e78d473260ef3f12ef50b8a577 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 6 May 2024 23:40:55 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix/complete=20ocrd=5Fcis=20supp?= =?UTF-8?q?ort?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile-ocrd_cis | 19 ------------------- test-ocrd_cis.sh | 26 +++++++++++++++++++++----- 2 files changed, 21 insertions(+), 24 deletions(-) delete mode 100644 Dockerfile-ocrd_cis diff --git a/Dockerfile-ocrd_cis b/Dockerfile-ocrd_cis deleted file mode 100644 index e967893..0000000 --- a/Dockerfile-ocrd_cis +++ /dev/null @@ -1,19 +0,0 @@ -ARG GIT_COMMIT="latest" -FROM quratorspk/ocrd-galley-core:$GIT_COMMIT - -ARG PIP_INSTALL="pip install --no-cache-dir" -ARG OCRD_CIS_VERSION="0.1.5" - - -# Build pip installable stuff -RUN ${PIP_INSTALL} \ -# Now the real stuff: - "https://github.com/cisocrgroup/ocrd_cis/archive/v${OCRD_CIS_VERSION}.tar.gz" - - -# Check pip dependencies -RUN pip check - - -# Default command -CMD ["ocrd-cis-ocropy-segment"] diff --git a/test-ocrd_cis.sh b/test-ocrd_cis.sh index 39afd91..f998df2 100755 --- a/test-ocrd_cis.sh +++ b/test-ocrd_cis.sh @@ -4,11 +4,27 @@ set -ex test_id=`basename $0` cd `mktemp -d /tmp/$test_id-XXXXX` +OCRD_CIS_OCROPY_MODEL=fraktur.pyrnn.gz + +# Prepare processors +ocrd resmgr download ocrd-cis-ocropy-recognize $OCRD_CIS_OCROPY_MODEL + # Prepare test workspace -wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip -unzip actevedef_718448162.first-page+binarization+segmentation.zip -cd actevedef_718448162.first-page+binarization+segmentation +wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip +unzip actevedef_718448162.first-page.zip +cd actevedef_718448162.first-page + +# XXX ocrd-cis-ocropy-segment wasn't happy with the binarized input (no +# "binarized" AlternativeImage?!), so we do it here again +ocrd-skimage-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN # Run tests -ocrd-cis-ocropy-segment -I OCR-D-IMG-BIN -O TEST-CIS-OCRPY-SEGMENT -# TODO -recognize +ocrd-cis-ocropy-segment \ + -I OCR-D-IMG-BIN -O TEST-CIS-OCROPY-SEG-LINE \ + -P level-of-operation page +test "$(grep TextLine TEST-CIS-OCROPY-SEG-LINE/*.xml | wc -l)" -gt 50 + +ocrd-cis-ocropy-recognize \ + -I TEST-CIS-OCROPY-SEG-LINE -O TEST-CIS-OCROPY-OCR \ + -P model $OCRD_CIS_OCROPY_MODEL +test "$(grep Unicode TEST-CIS-OCROPY-OCR/*.xml | wc -l)" -gt 50