From b468d688f26398bd5c7857e072a243c28face7e6 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 7 Feb 2020 12:27:42 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20Remove=20font=20identification?= =?UTF-8?q?=20for=20now?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- my_ocrd_workflow | 23 ----------------------- requirements.txt | 2 -- 2 files changed, 25 deletions(-) diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 89b16f6..09a8e5d 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -43,25 +43,6 @@ do_binarization() { -p '{"impl": "sauvola-ms-split"}' } -do_fontident() { - # Identify fonts in the images - - network=`python3 -c "import ocrd_typegroups_classifier, os; print(os.path.join(os.path.dirname(ocrd_typegroups_classifier.__file__), 'models', 'classifier.tgc'))"` - ocrd_typegroups_classifier_parameters=" - { - \"network\": \"$network\", - \"stride\": 143 - }" - - remove_filegrp OCR-D-OCR-FONTIDENT mets.xml - ocrd-typegroups-classifier -l $LOG_LEVEL \ - -m mets.xml -I OCR-D-IMG -O OCR-D-OCR-FONTIDENT \ - -p "$ocrd_typegroups_classifier_parameters" - # XXX Check if ocrd-typegroups-classifier uses the whole image - # XXX does DEFAULT have any meaning? /buerger_gedichte_1778.ocrd does not have - # any DEFAULT, yet -I DEFAULT seems to work for ocrd-typegroups-classifier -} - do_linesegmentation_tesserocr() { # Segment the lines in the binarized images @@ -145,10 +126,6 @@ page_upgrade_to_2019() { pip3 list -#do_fontident -#do_validate - - do_binarization do_validate diff --git a/requirements.txt b/requirements.txt index 90907ef..a87ead3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,6 @@ tensorflow-gpu < 2.0 # Needed for sbb_text_linedetector ocrd >= 2.0.0 -https://github.com/mikegerber/ocrd_typegroups_classifier/archive/fix/pass-down-page-id.tar.gz # XXX git+https://github.com/seuretm/ocrd_typegroups_classifier.git - https://github.com/mikegerber/ocrd_kraken/archive/fix/pass-down-page-id.tar.gz # XXX ocrd_kraken >= 0.1.1 ocrd_tesserocr