mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-01 01:19:52 +02:00
⤴ Update ocrd_tesserocr to 0.17.0
Update ocrd_tesserocr and add a test script. Fixes #72.
This commit is contained in:
parent
f58c254250
commit
f02b2973f9
3 changed files with 28 additions and 4 deletions
|
@ -3,7 +3,7 @@ FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG TESSDATA_BEST_VERSION="4.0.0"
|
||||
#XXX ARG OCRD_TESSEROCR_VERSION="0.16.0"
|
||||
ARG OCRD_TESSEROCR_VERSION="0.17.0"
|
||||
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
||||
|
||||
|
||||
|
@ -19,9 +19,7 @@ RUN add-apt-repository ppa:alex-p/tesseract-ocr && \
|
|||
|
||||
# Build pip installable stuff
|
||||
RUN ${PIP_INSTALL} \
|
||||
# "ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
|
||||
# XXX use official release again
|
||||
git+https://github.com/OCR-D/ocrd_tesserocr.git@refs/pull/191/merge
|
||||
"ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
|
||||
|
||||
# Check pip dependencies
|
||||
RUN pip check
|
||||
|
|
|
@ -27,3 +27,12 @@ issue should be open that reminds us to go back to a versioned release again.
|
|||
|
||||
Other than relying on "proper releases", this also has a second purpose: Review
|
||||
releases of qurator-spk releases.
|
||||
|
||||
|
||||
Test builds
|
||||
-----------
|
||||
XXX Review this
|
||||
```
|
||||
GIT_COMMIT=test ./build Dockerfile-core Dockerfile-ocrd_tesserocr
|
||||
DOCKER_IMAGE_TAG=test ./test-ocrd_tesserocr.sh
|
||||
```
|
||||
|
|
17
test-ocrd_tesserocr.sh
Executable file
17
test-ocrd_tesserocr.sh
Executable file
|
@ -0,0 +1,17 @@
|
|||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
cd `mktemp -d /tmp/test-ocrd_tesserocr-XXXXX`
|
||||
|
||||
# Prepare processors
|
||||
ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata
|
||||
|
||||
# Prepare test workspace
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
cd actevedef_718448162.first-page+binarization+segmentation
|
||||
|
||||
# Run tests
|
||||
ocrd-tesserocr-segment-region -I OCR-D-IMG-BIN -O TEST-TESS-SEG-REG
|
||||
ocrd-tesserocr-segment-line -I TEST-TESS-SEG-REG -O TEST-TESS-SEG-LINE
|
||||
ocrd-tesserocr-recognize -I TEST-TESS-SEG-LINE -O TEST-TESS-OCR -P model Fraktur_GT4HistOCR
|
Loading…
Add table
Add a link
Reference in a new issue