diff --git a/my_ocrd_workflow b/my_ocrd_workflow index b806b97..43b7bb6 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -81,21 +81,6 @@ do_ocr() { -p <(echo $ocrd_tesserocr_recognize_parameters) } -page_fix_xml() { - # Fix the PAGE XML generated by OCR-D core - # - # XXX core does not produce valid XML (See https://github.com/OCR-D/core/issues/242), fix it by setting the correct - # PAGE XML version. This makes PAGE Viewer open the file. - - filegrp=$1 - - local file - for file in `ocrd workspace find -G $filegrp`; do - sed -i 's#pagecontent/2017-07-15#pagecontent/2019-07-15#g' $file - sed -i 's#pagecontent/2018-07-15#pagecontent/2019-07-15#g' $file - done -} - page_validate_xml() { # Validate all PAGE XML against the XML schema @@ -133,9 +118,9 @@ page_validate_xml OCR-D-SEG-LINE do_validate do_ocr +page_validate_xml OCR-D-OCR-TESS do_validate -page_fix_xml OCR-D-OCR-TESS # XXX is it necessary anymore? page_fix_image_references OCR-D-OCR-TESS