mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-05 19:19:53 +02:00
Binarize images before segmenting
This commit is contained in:
parent
5e1ece4877
commit
8d66469621
1 changed files with 10 additions and 3 deletions
|
@ -20,6 +20,12 @@ remove_filegrp() {
|
||||||
# XXX This should also delete the files (after checking if they are indeed inside the workspace) and the directory
|
# XXX This should also delete the files (after checking if they are indeed inside the workspace) and the directory
|
||||||
}
|
}
|
||||||
|
|
||||||
|
do_binarization() {
|
||||||
|
remove_filegrp OCR-D-IMG-BIN mets.xml
|
||||||
|
ocrd-kraken-binarize -l $LOG_LEVEL \
|
||||||
|
-m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BIN
|
||||||
|
}
|
||||||
|
|
||||||
do_fontident() {
|
do_fontident() {
|
||||||
ocrd_typegroups_classifier_parameters='
|
ocrd_typegroups_classifier_parameters='
|
||||||
{
|
{
|
||||||
|
@ -43,12 +49,12 @@ do_linesegmentation() {
|
||||||
remove_filegrp OCR-D-SEG-REGION mets.xml
|
remove_filegrp OCR-D-SEG-REGION mets.xml
|
||||||
remove_filegrp OCR-D-SEG-LINE mets.xml
|
remove_filegrp OCR-D-SEG-LINE mets.xml
|
||||||
#ocrd-ocropy-segment -l $LOG_LEVEL \
|
#ocrd-ocropy-segment -l $LOG_LEVEL \
|
||||||
# -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
|
# -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE
|
||||||
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
||||||
#ocrd workspace validate mets.xml
|
#ocrd workspace validate mets.xml
|
||||||
|
|
||||||
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
|
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
|
||||||
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-REGION
|
-m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION
|
||||||
#ocrd workspace validate mets.xml
|
#ocrd workspace validate mets.xml
|
||||||
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
|
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
|
||||||
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
||||||
|
@ -103,8 +109,9 @@ page_fix_image_references() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# TODO Binarization
|
|
||||||
do_fontident
|
do_fontident
|
||||||
|
|
||||||
|
do_binarization
|
||||||
do_linesegmentation
|
do_linesegmentation
|
||||||
do_ocr
|
do_ocr
|
||||||
page_fix_xml OCR-D-OCR-TESS
|
page_fix_xml OCR-D-OCR-TESS
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue