mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-02 01:49:52 +02:00
Add a PAGE fix XML step
This commit is contained in:
parent
d98ce2d2d4
commit
b6c490e18b
1 changed files with 11 additions and 0 deletions
|
@ -54,6 +54,16 @@ do_ocr() {
|
||||||
#ocrd workspace validate mets.xml
|
#ocrd workspace validate mets.xml
|
||||||
}
|
}
|
||||||
|
|
||||||
|
page_fix_xml() {
|
||||||
|
# XXX core does not produce valid XML https://github.com/OCR-D/core/issues/242
|
||||||
|
filegrp=$1
|
||||||
|
|
||||||
|
local file
|
||||||
|
for file in `ocrd workspace find -G $filegrp`; do
|
||||||
|
sed -i 's#pagecontent/2017-07-15#pagecontent/2018-07-15#g' $file
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
page_validate_xml() {
|
page_validate_xml() {
|
||||||
filegrp=$1
|
filegrp=$1
|
||||||
|
|
||||||
|
@ -67,6 +77,7 @@ page_validate_xml() {
|
||||||
do_fontident
|
do_fontident
|
||||||
do_linesegmentation
|
do_linesegmentation
|
||||||
do_ocr
|
do_ocr
|
||||||
|
page_fix_xml OCR-D-OCR-TESS
|
||||||
page_validate_xml OCR-D-OCR-TESS # This also makes sure PAGE Viewer can open it
|
page_validate_xml OCR-D-OCR-TESS # This also makes sure PAGE Viewer can open it
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue