mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-27 21:59:52 +02:00
✨ Validate PAGE XML after OCR
This commit is contained in:
parent
0d7fd21446
commit
8b67866aac
1 changed files with 1 additions and 16 deletions
|
@ -81,21 +81,6 @@ do_ocr() {
|
||||||
-p <(echo $ocrd_tesserocr_recognize_parameters)
|
-p <(echo $ocrd_tesserocr_recognize_parameters)
|
||||||
}
|
}
|
||||||
|
|
||||||
page_fix_xml() {
|
|
||||||
# Fix the PAGE XML generated by OCR-D core
|
|
||||||
#
|
|
||||||
# XXX core does not produce valid XML (See https://github.com/OCR-D/core/issues/242), fix it by setting the correct
|
|
||||||
# PAGE XML version. This makes PAGE Viewer open the file.
|
|
||||||
|
|
||||||
filegrp=$1
|
|
||||||
|
|
||||||
local file
|
|
||||||
for file in `ocrd workspace find -G $filegrp`; do
|
|
||||||
sed -i 's#pagecontent/2017-07-15#pagecontent/2019-07-15#g' $file
|
|
||||||
sed -i 's#pagecontent/2018-07-15#pagecontent/2019-07-15#g' $file
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
page_validate_xml() {
|
page_validate_xml() {
|
||||||
# Validate all PAGE XML against the XML schema
|
# Validate all PAGE XML against the XML schema
|
||||||
|
|
||||||
|
@ -133,9 +118,9 @@ page_validate_xml OCR-D-SEG-LINE
|
||||||
do_validate
|
do_validate
|
||||||
|
|
||||||
do_ocr
|
do_ocr
|
||||||
|
page_validate_xml OCR-D-OCR-TESS
|
||||||
do_validate
|
do_validate
|
||||||
|
|
||||||
page_fix_xml OCR-D-OCR-TESS # XXX is it necessary anymore?
|
|
||||||
page_fix_image_references OCR-D-OCR-TESS
|
page_fix_image_references OCR-D-OCR-TESS
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue