mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-03 02:19:53 +02:00
🐛 Fix/complete ocrd_cis support
This commit is contained in:
parent
e702939a44
commit
956de7492f
2 changed files with 21 additions and 24 deletions
|
@ -1,19 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_CIS_VERSION="0.1.5"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
# Now the real stuff:
|
|
||||||
"https://github.com/cisocrgroup/ocrd_cis/archive/v${OCRD_CIS_VERSION}.tar.gz"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-cis-ocropy-segment"]
|
|
|
@ -4,11 +4,27 @@ set -ex
|
||||||
test_id=`basename $0`
|
test_id=`basename $0`
|
||||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
|
OCRD_CIS_OCROPY_MODEL=fraktur.pyrnn.gz
|
||||||
|
|
||||||
|
# Prepare processors
|
||||||
|
ocrd resmgr download ocrd-cis-ocropy-recognize $OCRD_CIS_OCROPY_MODEL
|
||||||
|
|
||||||
# Prepare test workspace
|
# Prepare test workspace
|
||||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
||||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
unzip actevedef_718448162.first-page.zip
|
||||||
cd actevedef_718448162.first-page+binarization+segmentation
|
cd actevedef_718448162.first-page
|
||||||
|
|
||||||
|
# XXX ocrd-cis-ocropy-segment wasn't happy with the binarized input (no
|
||||||
|
# "binarized" AlternativeImage?!), so we do it here again
|
||||||
|
ocrd-skimage-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
ocrd-cis-ocropy-segment -I OCR-D-IMG-BIN -O TEST-CIS-OCRPY-SEGMENT
|
ocrd-cis-ocropy-segment \
|
||||||
# TODO -recognize
|
-I OCR-D-IMG-BIN -O TEST-CIS-OCROPY-SEG-LINE \
|
||||||
|
-P level-of-operation page
|
||||||
|
test "$(grep TextLine TEST-CIS-OCROPY-SEG-LINE/*.xml | wc -l)" -gt 50
|
||||||
|
|
||||||
|
ocrd-cis-ocropy-recognize \
|
||||||
|
-I TEST-CIS-OCROPY-SEG-LINE -O TEST-CIS-OCROPY-OCR \
|
||||||
|
-P model $OCRD_CIS_OCROPY_MODEL
|
||||||
|
test "$(grep Unicode TEST-CIS-OCROPY-OCR/*.xml | wc -l)" -gt 50
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue