From 85ff80d548bbcf5ba43bba6e69cd3efaa69fd899 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 16 Aug 2019 14:04:36 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Use=20dinglehopper's=20new=20OCR-D?= =?UTF-8?q?=20interface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 5 ----- my_ocrd_workflow | 13 ++----------- requirements.txt | 2 ++ 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index f3907a9..5e85272 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,10 +20,5 @@ RUN pip3 install --no-cache-dir -r /tmp/requirements.txt COPY my_ocrd_workflow /usr/bin COPY xsd /usr/bin/xsd -# XXX Hack -COPY ../experiments/ocr-eval /usr/local/ocr-eval -RUN pip3 install --no-cache-dir -r /usr/local/ocr-eval/requirements.txt - - WORKDIR /data CMD ["/usr/bin/my_ocrd_workflow"] diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 486b8ad..0a96a75 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -155,17 +155,8 @@ do_validate if ocrd workspace list-group | grep -q OCR-D-GT-PAGE; then - for g in `ocrd workspace list-page`; do - echo "== $g" - gt=`ocrd workspace find -G OCR-D-GT-PAGE -g $g` - ocr=`ocrd workspace find -G OCR-D-OCR-TESS -g $g` - - if [ -n "$gt" ]; then - python3 /usr/local/ocr-eval/cli.py $gt $ocr - mv report.json report-$g.json - mv report.html report-$g.html - fi - done + remove_filegrp OCR-D-OCR-TESS-EVAL mets.xml + ocrd-dinglehopper -m mets.xml -I OCR-D-GT-PAGE,OCR-D-OCR-TESS -O OCR-D-OCR-TESS-EVAL fi # vim:tw=120: diff --git a/requirements.txt b/requirements.txt index 3d9f5e3..2f2bb31 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ https://github.com/mikegerber/ocrd_kraken/archive/fix/pass-down-page-id.tar.gz tesserocr == 2.3.1 # 2.4.0 fails with Ubuntu 18.04's tesseract ocrd_tesserocr + +https://github.com/qurator-spk/dinglehopper/archive/0f056b9.tar.gz