Validate PAGE XML after OCR

pull/27/head
Gerber, Mike 5 years ago
parent 0d7fd21446
commit 8b67866aac

@ -81,21 +81,6 @@ do_ocr() {
-p <(echo $ocrd_tesserocr_recognize_parameters)
}
page_fix_xml() {
# Fix the PAGE XML generated by OCR-D core
#
# XXX core does not produce valid XML (See https://github.com/OCR-D/core/issues/242), fix it by setting the correct
# PAGE XML version. This makes PAGE Viewer open the file.
filegrp=$1
local file
for file in `ocrd workspace find -G $filegrp`; do
sed -i 's#pagecontent/2017-07-15#pagecontent/2019-07-15#g' $file
sed -i 's#pagecontent/2018-07-15#pagecontent/2019-07-15#g' $file
done
}
page_validate_xml() {
# Validate all PAGE XML against the XML schema
@ -133,9 +118,9 @@ page_validate_xml OCR-D-SEG-LINE
do_validate
do_ocr
page_validate_xml OCR-D-OCR-TESS
do_validate
page_fix_xml OCR-D-OCR-TESS # XXX is it necessary anymore?
page_fix_image_references OCR-D-OCR-TESS

Loading…
Cancel
Save