⤴ Update ocrd_tesserocr to 0.17.0

Update ocrd_tesserocr and add a test script.

Fixes #72.
master
Gerber, Mike 1 year ago
parent f58c254250
commit f02b2973f9

@ -3,7 +3,7 @@ FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
ARG PIP_INSTALL="pip install --no-cache-dir"
ARG TESSDATA_BEST_VERSION="4.0.0"
#XXX ARG OCRD_TESSEROCR_VERSION="0.16.0"
ARG OCRD_TESSEROCR_VERSION="0.17.0"
ENV TESSDATA_PREFIX /usr/local/share/tessdata
@ -19,9 +19,7 @@ RUN add-apt-repository ppa:alex-p/tesseract-ocr && \
# Build pip installable stuff
RUN ${PIP_INSTALL} \
# "ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
# XXX use official release again
git+https://github.com/OCR-D/ocrd_tesserocr.git@refs/pull/191/merge
"ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
# Check pip dependencies
RUN pip check

@ -27,3 +27,12 @@ issue should be open that reminds us to go back to a versioned release again.
Other than relying on "proper releases", this also has a second purpose: Review
releases of qurator-spk releases.
Test builds
-----------
XXX Review this
```
GIT_COMMIT=test ./build Dockerfile-core Dockerfile-ocrd_tesserocr
DOCKER_IMAGE_TAG=test ./test-ocrd_tesserocr.sh
```

@ -0,0 +1,17 @@
#!/bin/sh
set -ex
cd `mktemp -d /tmp/test-ocrd_tesserocr-XXXXX`
# Prepare processors
ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata
# Prepare test workspace
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
unzip actevedef_718448162.first-page+binarization+segmentation.zip
cd actevedef_718448162.first-page+binarization+segmentation
# Run tests
ocrd-tesserocr-segment-region -I OCR-D-IMG-BIN -O TEST-TESS-SEG-REG
ocrd-tesserocr-segment-line -I TEST-TESS-SEG-REG -O TEST-TESS-SEG-LINE
ocrd-tesserocr-recognize -I TEST-TESS-SEG-LINE -O TEST-TESS-OCR -P model Fraktur_GT4HistOCR
Loading…
Cancel
Save