Use ocrd-tesserocr-segment-*

pull/27/head
Gerber, Mike 6 years ago
parent e30f03699c
commit 5e1ece4877

@ -40,18 +40,24 @@ do_fontident() {
}
do_linesegmentation() {
remove_filegrp OCR-D-SEG-REGION mets.xml
remove_filegrp OCR-D-SEG-LINE mets.xml
ocrd-ocropy-segment -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
#ocrd-ocropy-segment -l $LOG_LEVEL \
# -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
#ocrd workspace validate mets.xml
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-REGION
#ocrd workspace validate mets.xml
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
#ocrd workspace validate mets.xml
# XXX compare ocrd-tesserocr-segment* vs tesseract native
# XXX This leaves copies of the images at the top level of the workspace, because it "downloads" the "remote" files.
# Clean it up.
# Clean it up. (Maybe only affects ocrd-ocropy-segment)
find . -maxdepth 1 -name "OCR-D-IMG*" -type f -exec rm -v {} \;
# XXX ocrd-tesserocr-segment-line does not seem to produce any line segmentation
# XXX mv {ocrd-ocropy-segment,-line}
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
}
do_ocr() {

Loading…
Cancel
Save