From ddda6e48bc29f6c1d17d5c8990a0a444e9956bc7 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 26 Jun 2019 16:26:13 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Add=20my=20collection=20of=20OCR?= =?UTF-8?q?-D=20bug=20reproducers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bug-lohenstein-ocrd-typegroups-classifier.sh | 13 +++++++++++++ ocrd-bugs/bug-ocropy-segment-breakage.sh | 10 ++++++++++ ocrd-bugs/bug-remote-image.sh | 7 +++++++ .../bug-tesserocr-segment-line-no-description.sh | 2 ++ .../bug-tesserocr-segment-line-should-give-error.sh | 6 ++++++ 5 files changed, 38 insertions(+) create mode 100644 ocrd-bugs/bug-lohenstein-ocrd-typegroups-classifier.sh create mode 100644 ocrd-bugs/bug-ocropy-segment-breakage.sh create mode 100644 ocrd-bugs/bug-remote-image.sh create mode 100644 ocrd-bugs/bug-tesserocr-segment-line-no-description.sh create mode 100644 ocrd-bugs/bug-tesserocr-segment-line-should-give-error.sh diff --git a/ocrd-bugs/bug-lohenstein-ocrd-typegroups-classifier.sh b/ocrd-bugs/bug-lohenstein-ocrd-typegroups-classifier.sh new file mode 100644 index 0000000..d7fb9ab --- /dev/null +++ b/ocrd-bugs/bug-lohenstein-ocrd-typegroups-classifier.sh @@ -0,0 +1,13 @@ +#!/bin/bash -x +cd `mktemp -d` +wget https://ocr-d-repo.scc.kit.edu/api/v1/dataresources/8d8aa287-94ca-48e3-84a8-1ee602871550/data/lohenstein_agrippina_1665.ocrd.zip +dtrx lohenstein_agrippina_1665.ocrd.zip +cd lohenstein_agrippina_1665.ocrd/data +ocrd_typegroups_classifier_parameters=' + { + "network": "/home/mike/devel/OCR-D/monorepo/ocrd_typegroups_classifier/ocrd_typegroups_classifier/models/classifier.tgc", + "stride":143 + }' +ocrd-typegroups-classifier -l DEBUG \ + -m mets.xml -I OCR-D-IMG -O OCR-D-FONTIDENT \ + -p <(echo $ocrd_typegroups_classifier_parameters) diff --git a/ocrd-bugs/bug-ocropy-segment-breakage.sh b/ocrd-bugs/bug-ocropy-segment-breakage.sh new file mode 100644 index 0000000..18da71d --- /dev/null +++ b/ocrd-bugs/bug-ocropy-segment-breakage.sh @@ -0,0 +1,10 @@ +# zips all from https://ocr-d-repo.scc.kit.edu/api/v1/metastore/bagit +for z in benner_herrnhuterey04_1748.ocrd.zip buerger_gedichte_1778.ocrd.zip estor_rechtsgelehrsamkeit02_1758.ocrd.zip lohenstein_agrippina_1665.ocrd.zip silesius_seelenlust01_1657.ocrd.zip; do + echo "== $z" + cd `mktemp -d` + cp /srv/data/OCR-D/$z . + dtrx $z + cd ${z//.zip}/data + + ocrd-ocropy-segment -l DEBUG -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE 2>&1 | tail -n 1 +done diff --git a/ocrd-bugs/bug-remote-image.sh b/ocrd-bugs/bug-remote-image.sh new file mode 100644 index 0000000..6a3fd37 --- /dev/null +++ b/ocrd-bugs/bug-remote-image.sh @@ -0,0 +1,7 @@ +#!/bin/sh +set -x +cd `mktemp -d` +wget -q https://ocr-d-repo.scc.kit.edu/api/v1/dataresources/f15fb8c8-3842-4314-9a44-5e8b472d7bfc/data/buerger_gedichte_1778.ocrd.zip +dtrx buerger_gedichte_1778.ocrd.zip +cd buerger_gedichte_1778.ocrd/data +ocrd workspace validate mets.xml diff --git a/ocrd-bugs/bug-tesserocr-segment-line-no-description.sh b/ocrd-bugs/bug-tesserocr-segment-line-no-description.sh new file mode 100644 index 0000000..c45a90b --- /dev/null +++ b/ocrd-bugs/bug-tesserocr-segment-line-no-description.sh @@ -0,0 +1,2 @@ +#!/bin/bash -x +ocrd-tesserocr-segment-line --help diff --git a/ocrd-bugs/bug-tesserocr-segment-line-should-give-error.sh b/ocrd-bugs/bug-tesserocr-segment-line-should-give-error.sh new file mode 100644 index 0000000..a5b916b --- /dev/null +++ b/ocrd-bugs/bug-tesserocr-segment-line-should-give-error.sh @@ -0,0 +1,6 @@ +#!/bin/bash -x +cd `mktemp -d` +wget https://ocr-d-repo.scc.kit.edu/api/v1/dataresources/8d8aa287-94ca-48e3-84a8-1ee602871550/data/lohenstein_agrippina_1665.ocrd.zip +dtrx lohenstein_agrippina_1665.ocrd.zip +cd lohenstein_agrippina_1665.ocrd/data +ocrd-tesserocr-segment-line -l DEBUG -m mets.xml -I DOES-NOT-EXIST -O OCR-D-SEG-REGION