Binarize images before segmenting

pull/27/head
Gerber, Mike 5 years ago
parent 5e1ece4877
commit 8d66469621

@ -20,6 +20,12 @@ remove_filegrp() {
# XXX This should also delete the files (after checking if they are indeed inside the workspace) and the directory
}
do_binarization() {
remove_filegrp OCR-D-IMG-BIN mets.xml
ocrd-kraken-binarize -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BIN
}
do_fontident() {
ocrd_typegroups_classifier_parameters='
{
@ -43,12 +49,12 @@ do_linesegmentation() {
remove_filegrp OCR-D-SEG-REGION mets.xml
remove_filegrp OCR-D-SEG-LINE mets.xml
#ocrd-ocropy-segment -l $LOG_LEVEL \
# -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
# -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
#ocrd workspace validate mets.xml
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-REGION
-m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION
#ocrd workspace validate mets.xml
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
@ -103,8 +109,9 @@ page_fix_image_references() {
}
# TODO Binarization
do_fontident
do_binarization
do_linesegmentation
do_ocr
page_fix_xml OCR-D-OCR-TESS

Loading…
Cancel
Save