|
|
|
@ -40,7 +40,7 @@ do_binarization() {
|
|
|
|
|
remove_filegrp OCR-D-IMG-BIN mets.xml
|
|
|
|
|
ocrd-olena-binarize -l $LOG_LEVEL \
|
|
|
|
|
-m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BINPAGE \
|
|
|
|
|
-p <(echo '{"impl": "sauvola-ms-split"}')
|
|
|
|
|
-p '{"impl": "sauvola-ms-split"}'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
do_fontident() {
|
|
|
|
@ -56,7 +56,7 @@ do_fontident() {
|
|
|
|
|
remove_filegrp OCR-D-OCR-FONTIDENT mets.xml
|
|
|
|
|
ocrd-typegroups-classifier -l $LOG_LEVEL \
|
|
|
|
|
-m mets.xml -I OCR-D-IMG -O OCR-D-OCR-FONTIDENT \
|
|
|
|
|
-p <(echo $ocrd_typegroups_classifier_parameters)
|
|
|
|
|
-p "$ocrd_typegroups_classifier_parameters"
|
|
|
|
|
# XXX Check if ocrd-typegroups-classifier uses the whole image
|
|
|
|
|
# XXX does DEFAULT have any meaning? /buerger_gedichte_1778.ocrd does not have
|
|
|
|
|
# any DEFAULT, yet -I DEFAULT seems to work for ocrd-typegroups-classifier
|
|
|
|
@ -97,7 +97,7 @@ do_ocr() {
|
|
|
|
|
remove_filegrp OCR-D-OCR-TESS mets.xml
|
|
|
|
|
ocrd-tesserocr-recognize -l $LOG_LEVEL \
|
|
|
|
|
-m mets.xml -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS \
|
|
|
|
|
-p <(echo $ocrd_tesserocr_recognize_parameters)
|
|
|
|
|
-p "$ocrd_tesserocr_recognize_parameters"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
do_ocr_calamari() {
|
|
|
|
@ -105,7 +105,7 @@ do_ocr_calamari() {
|
|
|
|
|
remove_filegrp OCR-D-OCR-CALAMARI mets.xml
|
|
|
|
|
ocrd-calamari-recognize -l $LOG_LEVEL \
|
|
|
|
|
-m mets.xml -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI \
|
|
|
|
|
-p <(echo $ocrd_calamari_recognize_parameters)
|
|
|
|
|
-p "$ocrd_calamari_recognize_parameters"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
page_validate_xml() {
|
|
|
|
|