mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-09 06:39:53 +02:00
Binarize images before segmenting
This commit is contained in:
parent
5e1ece4877
commit
8d66469621
1 changed files with 10 additions and 3 deletions
|
@ -20,6 +20,12 @@ remove_filegrp() {
|
|||
# XXX This should also delete the files (after checking if they are indeed inside the workspace) and the directory
|
||||
}
|
||||
|
||||
do_binarization() {
|
||||
remove_filegrp OCR-D-IMG-BIN mets.xml
|
||||
ocrd-kraken-binarize -l $LOG_LEVEL \
|
||||
-m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BIN
|
||||
}
|
||||
|
||||
do_fontident() {
|
||||
ocrd_typegroups_classifier_parameters='
|
||||
{
|
||||
|
@ -43,12 +49,12 @@ do_linesegmentation() {
|
|||
remove_filegrp OCR-D-SEG-REGION mets.xml
|
||||
remove_filegrp OCR-D-SEG-LINE mets.xml
|
||||
#ocrd-ocropy-segment -l $LOG_LEVEL \
|
||||
# -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
|
||||
# -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE
|
||||
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
||||
#ocrd workspace validate mets.xml
|
||||
|
||||
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
|
||||
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-REGION
|
||||
-m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION
|
||||
#ocrd workspace validate mets.xml
|
||||
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
|
||||
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
||||
|
@ -103,8 +109,9 @@ page_fix_image_references() {
|
|||
}
|
||||
|
||||
|
||||
# TODO Binarization
|
||||
do_fontident
|
||||
|
||||
do_binarization
|
||||
do_linesegmentation
|
||||
do_ocr
|
||||
page_fix_xml OCR-D-OCR-TESS
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue