mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-05 19:19:53 +02:00
⤴ Update ocrd_tesserocr to 0.17.0
Update ocrd_tesserocr and add a test script. Fixes #72.
This commit is contained in:
parent
f58c254250
commit
f02b2973f9
3 changed files with 28 additions and 4 deletions
|
@ -3,7 +3,7 @@ FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||||
ARG TESSDATA_BEST_VERSION="4.0.0"
|
ARG TESSDATA_BEST_VERSION="4.0.0"
|
||||||
#XXX ARG OCRD_TESSEROCR_VERSION="0.16.0"
|
ARG OCRD_TESSEROCR_VERSION="0.17.0"
|
||||||
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,9 +19,7 @@ RUN add-apt-repository ppa:alex-p/tesseract-ocr && \
|
||||||
|
|
||||||
# Build pip installable stuff
|
# Build pip installable stuff
|
||||||
RUN ${PIP_INSTALL} \
|
RUN ${PIP_INSTALL} \
|
||||||
# "ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
|
"ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
|
||||||
# XXX use official release again
|
|
||||||
git+https://github.com/OCR-D/ocrd_tesserocr.git@refs/pull/191/merge
|
|
||||||
|
|
||||||
# Check pip dependencies
|
# Check pip dependencies
|
||||||
RUN pip check
|
RUN pip check
|
||||||
|
|
|
@ -27,3 +27,12 @@ issue should be open that reminds us to go back to a versioned release again.
|
||||||
|
|
||||||
Other than relying on "proper releases", this also has a second purpose: Review
|
Other than relying on "proper releases", this also has a second purpose: Review
|
||||||
releases of qurator-spk releases.
|
releases of qurator-spk releases.
|
||||||
|
|
||||||
|
|
||||||
|
Test builds
|
||||||
|
-----------
|
||||||
|
XXX Review this
|
||||||
|
```
|
||||||
|
GIT_COMMIT=test ./build Dockerfile-core Dockerfile-ocrd_tesserocr
|
||||||
|
DOCKER_IMAGE_TAG=test ./test-ocrd_tesserocr.sh
|
||||||
|
```
|
||||||
|
|
17
test-ocrd_tesserocr.sh
Executable file
17
test-ocrd_tesserocr.sh
Executable file
|
@ -0,0 +1,17 @@
|
||||||
|
#!/bin/sh
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
cd `mktemp -d /tmp/test-ocrd_tesserocr-XXXXX`
|
||||||
|
|
||||||
|
# Prepare processors
|
||||||
|
ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata
|
||||||
|
|
||||||
|
# Prepare test workspace
|
||||||
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
|
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
|
cd actevedef_718448162.first-page+binarization+segmentation
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
ocrd-tesserocr-segment-region -I OCR-D-IMG-BIN -O TEST-TESS-SEG-REG
|
||||||
|
ocrd-tesserocr-segment-line -I TEST-TESS-SEG-REG -O TEST-TESS-SEG-LINE
|
||||||
|
ocrd-tesserocr-recognize -I TEST-TESS-SEG-LINE -O TEST-TESS-OCR -P model Fraktur_GT4HistOCR
|
Loading…
Add table
Add a link
Reference in a new issue