From ed96a49321b0e8ebcec4264c705b102c61d592cc Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 25 Apr 2024 17:24:08 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20WIP:=20Migrate=20to=20using=20oc?= =?UTF-8?q?rd:all=20image=20-=20ocrd=5Fanybaseocr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile-ocrd_anybaseocr | 19 ------------------- test-ocrd_anybaseocr.sh | 16 ++++++++++++++++ wrapper/qurator/ocrd_galley/sub_images.py | 1 + 3 files changed, 17 insertions(+), 19 deletions(-) delete mode 100644 Dockerfile-ocrd_anybaseocr create mode 100755 test-ocrd_anybaseocr.sh diff --git a/Dockerfile-ocrd_anybaseocr b/Dockerfile-ocrd_anybaseocr deleted file mode 100644 index 6ce5d0e..0000000 --- a/Dockerfile-ocrd_anybaseocr +++ /dev/null @@ -1,19 +0,0 @@ -ARG GIT_COMMIT="latest" -FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT - -ARG PIP_INSTALL="pip install --no-cache-dir" -ARG OCRD_ANYBASEOCR_VERSION="1.8.2" - - - -# Build pip installable stuff -RUN ${PIP_INSTALL} \ - "ocrd_anybaseocr == ${OCRD_ANYBASEOCR_VERSION}" - - -# Check pip dependencies -RUN pip check - - -# Default command -CMD ["ocrd-anybaseocr-crop"] diff --git a/test-ocrd_anybaseocr.sh b/test-ocrd_anybaseocr.sh new file mode 100755 index 0000000..6d44615 --- /dev/null +++ b/test-ocrd_anybaseocr.sh @@ -0,0 +1,16 @@ +#!/bin/sh +set -ex + +test_id=`basename $0` +cd `mktemp -d /tmp/$test_id-XXXXX` + +# Prepare processors + +# Prepare test workspace +wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip +unzip actevedef_718448162.first-page.zip +cd actevedef_718448162.first-page + +# Run tests +ocrd-anybaseocr-binarize -I OCR-D-IMG -O OCR-D-BIN -P operation_level page -P threshold 0.3 +ocrd-anybaseocr-deskew -I OCR-D-BIN -O OCR-D-DESKEW -P maxskew 5.0 -P skewsteps 20 -P operation_level page diff --git a/wrapper/qurator/ocrd_galley/sub_images.py b/wrapper/qurator/ocrd_galley/sub_images.py index f2f4ae1..e532e82 100644 --- a/wrapper/qurator/ocrd_galley/sub_images.py +++ b/wrapper/qurator/ocrd_galley/sub_images.py @@ -32,6 +32,7 @@ sub_images = { "ocrd-skimage-binarize": "ocrd_wrap", "ocrd-skimage-denoise": "ocrd_wrap", "ocrd-eynollah-segment": "eynollah", + "ocrd-anybaseocr-binarize": "ocrd_anybaseocr", "ocrd-anybaseocr-crop": "ocrd_anybaseocr", "ocrd-anybaseocr-deskew": "ocrd_anybaseocr", "ocrd-trocr-recognize": "ocrd_trocr",