mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-27 21:59:52 +02:00
🚧 Work around problems with ocrd-tesserocr producing TextEquiv/@conf
This commit is contained in:
parent
8b67866aac
commit
44772f1923
1 changed files with 15 additions and 2 deletions
|
@ -106,8 +106,21 @@ page_fix_image_references() {
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
page_workaround_remove_conf() {
|
||||||
|
# XXX Work around https://github.com/OCR-D/core/issues/269
|
||||||
|
|
||||||
do_fontident
|
filegrp=$1
|
||||||
|
|
||||||
|
local file
|
||||||
|
for file in `ocrd workspace find -G $filegrp`; do
|
||||||
|
xmlstarlet ed --inplace \
|
||||||
|
-N 'page=http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' \
|
||||||
|
-d '//page:TextEquiv/@conf' $file
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# XXX do_fontident
|
||||||
|
|
||||||
do_binarization
|
do_binarization
|
||||||
do_validate
|
do_validate
|
||||||
|
@ -119,9 +132,9 @@ do_validate
|
||||||
|
|
||||||
do_ocr
|
do_ocr
|
||||||
page_validate_xml OCR-D-OCR-TESS
|
page_validate_xml OCR-D-OCR-TESS
|
||||||
|
page_workaround_remove_conf OCR-D-OCR-TESS
|
||||||
do_validate
|
do_validate
|
||||||
|
|
||||||
page_fix_image_references OCR-D-OCR-TESS
|
page_fix_image_references OCR-D-OCR-TESS
|
||||||
|
|
||||||
|
|
||||||
# vim:tw=120:
|
# vim:tw=120:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue