diff --git a/Dockerfile-ocrd_tesserocr b/Dockerfile-ocrd_tesserocr index f6004ce..58bc9b3 100644 --- a/Dockerfile-ocrd_tesserocr +++ b/Dockerfile-ocrd_tesserocr @@ -3,7 +3,7 @@ FROM quratorspk/ocrd-galley-core:$GIT_COMMIT ARG PIP_INSTALL="pip install --no-cache-dir" ARG TESSDATA_BEST_VERSION="4.0.0" -#XXX ARG OCRD_TESSEROCR_VERSION="0.16.0" +ARG OCRD_TESSEROCR_VERSION="0.17.0" ENV TESSDATA_PREFIX /usr/local/share/tessdata @@ -19,9 +19,7 @@ RUN add-apt-repository ppa:alex-p/tesseract-ocr && \ # Build pip installable stuff RUN ${PIP_INSTALL} \ -# "ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}" -# XXX use official release again - git+https://github.com/OCR-D/ocrd_tesserocr.git@refs/pull/191/merge + "ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}" # Check pip dependencies RUN pip check diff --git a/README-DEV.md b/README-DEV.md index fc78890..234e61b 100644 --- a/README-DEV.md +++ b/README-DEV.md @@ -27,3 +27,12 @@ issue should be open that reminds us to go back to a versioned release again. Other than relying on "proper releases", this also has a second purpose: Review releases of qurator-spk releases. + + +Test builds +----------- +XXX Review this +``` +GIT_COMMIT=test ./build Dockerfile-core Dockerfile-ocrd_tesserocr +DOCKER_IMAGE_TAG=test ./test-ocrd_tesserocr.sh +``` diff --git a/test-ocrd_tesserocr.sh b/test-ocrd_tesserocr.sh new file mode 100755 index 0000000..01dc89b --- /dev/null +++ b/test-ocrd_tesserocr.sh @@ -0,0 +1,17 @@ +#!/bin/sh +set -ex + +cd `mktemp -d /tmp/test-ocrd_tesserocr-XXXXX` + +# Prepare processors +ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata + +# Prepare test workspace +wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip +unzip actevedef_718448162.first-page+binarization+segmentation.zip +cd actevedef_718448162.first-page+binarization+segmentation + +# Run tests +ocrd-tesserocr-segment-region -I OCR-D-IMG-BIN -O TEST-TESS-SEG-REG +ocrd-tesserocr-segment-line -I TEST-TESS-SEG-REG -O TEST-TESS-SEG-LINE +ocrd-tesserocr-recognize -I TEST-TESS-SEG-LINE -O TEST-TESS-OCR -P model Fraktur_GT4HistOCR