From c207859bcdb8fdef2a59cceaf541af9d101fddf2 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 19 Jun 2019 12:51:52 +0200 Subject: [PATCH] Refactor: Extract functions for the steps --- my_ocrd_workflow | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 828e210..f8e0d65 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -13,27 +13,35 @@ remove_filegrp() { -d "//mets:fileGrp[@USE='$filegrp_use']" $mets } +do_fontident() { + ocrd_typegroups_classifier_parameters=' + { + "network": "/home/mike/devel/OCR-D/monorepo/ocrd_typegroups_classifier/ocrd_typegroups_classifier/models/classifier.tgc", + "stride":143 + }' + remove_filegrp OCR-D-FONTIDENT mets.xml + ocrd-typegroups-classifier -l DEBUG \ + -m mets.xml -I OCR-D-IMG -O OCR-D-FONTIDENT \ + -p <(echo $ocrd_typegroups_classifier_parameters) + # XXX does DEFAULT have any meaning? /buerger_gedichte_1778.ocrd does not have + # any DEFAULT, yet -I DEFAULT seems to work for ocrd-typegroups-classifier + #ocrd workspace validate mets.xml + # XXX Unspecified USE category 'FONTIDENT' in fileGrp 'OCR-D-FONTIDENT' + # XXX File 'OCR-D-FONTIDENT_OCR-D-IMG_0002' does not manifest any physical page. + # XXX Won't download remote image +} + +do_linesegmentation() { + remove_filegrp OCR-D-SEG-LINE mets.xml + ocrd-tesserocr-segment-line -l DEBUG \ + -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE + #ocrd workspace validate mets.xml +} + + +do_fontident +do_linesegmentation -ocrd_typegroups_classifier_parameters=' -{ - "network": "/home/mike/devel/OCR-D/monorepo/ocrd_typegroups_classifier/ocrd_typegroups_classifier/models/classifier.tgc", - "stride":143 -}' -remove_filegrp OCR-D-FONTIDENT mets.xml -ocrd-typegroups-classifier -l DEBUG \ - -m mets.xml -I OCR-D-IMG -O OCR-D-FONTIDENT \ - -p <(echo $ocrd_typegroups_classifier_parameters) -# XXX does DEFAULT have any meaning? /buerger_gedichte_1778.ocrd does not have -# any DEFAULT, yet -I DEFAULT seems to work for ocrd-typegroups-classifier -#ocrd workspace validate mets.xml -# XXX Unspecified USE category 'FONTIDENT' in fileGrp 'OCR-D-FONTIDENT' -# XXX File 'OCR-D-FONTIDENT_OCR-D-IMG_0002' does not manifest any physical page. -# XXX Won't download remote image - -remove_filegrp OCR-D-SEG-LINE mets.xml -ocrd-tesserocr-segment-line -l DEBUG \ - -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE -#ocrd workspace validate mets.xml # vim:tw=120: