From 9bd3853c78fc8870d51b4d84a9c0218e9d0f186c Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 19 Jun 2019 13:02:54 +0200 Subject: [PATCH] Add OCR step --- my_ocrd_workflow | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 3e4484f..498446f 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -38,9 +38,18 @@ do_linesegmentation() { #ocrd workspace validate mets.xml } +do_ocr() { + ocrd_tesserocr_recognize_parameters='{ "model": "eng" }' # TODO mods:language + fontident → model + remove_filegrp OCR-D-OCR-TESS mets.xml + ocrd-tesserocr-recognize -l DEBUG \ + -m mets.xml -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS \ + -p <(echo $ocrd_tesserocr_recognize_parameters) + #ocrd workspace validate mets.xml +} do_fontident do_linesegmentation +do_ocr # XXX Multiple calls create multiple identical mets:agent elements