From 44772f1923ba46196d3cfed863c4921405ac857b Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 5 Aug 2019 15:40:39 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Work=20around=20problems=20with?= =?UTF-8?q?=20ocrd-tesserocr=20producing=20TextEquiv/@conf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- my_ocrd_workflow | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 43b7bb6..8f01175 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -106,8 +106,21 @@ page_fix_image_references() { done } +page_workaround_remove_conf() { + # XXX Work around https://github.com/OCR-D/core/issues/269 -do_fontident + filegrp=$1 + + local file + for file in `ocrd workspace find -G $filegrp`; do + xmlstarlet ed --inplace \ + -N 'page=http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' \ + -d '//page:TextEquiv/@conf' $file + done +} + + +# XXX do_fontident do_binarization do_validate @@ -119,9 +132,9 @@ do_validate do_ocr page_validate_xml OCR-D-OCR-TESS +page_workaround_remove_conf OCR-D-OCR-TESS do_validate page_fix_image_references OCR-D-OCR-TESS - # vim:tw=120: