mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-08 22:29:56 +02:00
Use ocrd-tesserocr-segment-*
This commit is contained in:
parent
e30f03699c
commit
5e1ece4877
1 changed files with 14 additions and 8 deletions
|
@ -40,18 +40,24 @@ do_fontident() {
|
|||
}
|
||||
|
||||
do_linesegmentation() {
|
||||
remove_filegrp OCR-D-SEG-REGION mets.xml
|
||||
remove_filegrp OCR-D-SEG-LINE mets.xml
|
||||
ocrd-ocropy-segment -l $LOG_LEVEL \
|
||||
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
|
||||
#ocrd-ocropy-segment -l $LOG_LEVEL \
|
||||
# -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
|
||||
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
||||
#ocrd workspace validate mets.xml
|
||||
|
||||
# XXX This leaves copies of the images at the top level of the workspace, because it "downloads" the "remote" files.
|
||||
# Clean it up.
|
||||
find . -maxdepth 1 -name "OCR-D-IMG*" -type f -exec rm -v {} \;
|
||||
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
|
||||
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-REGION
|
||||
#ocrd workspace validate mets.xml
|
||||
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
|
||||
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
||||
#ocrd workspace validate mets.xml
|
||||
# XXX compare ocrd-tesserocr-segment* vs tesseract native
|
||||
|
||||
# XXX ocrd-tesserocr-segment-line does not seem to produce any line segmentation
|
||||
# XXX mv {ocrd-ocropy-segment,-line}
|
||||
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
||||
# XXX This leaves copies of the images at the top level of the workspace, because it "downloads" the "remote" files.
|
||||
# Clean it up. (Maybe only affects ocrd-ocropy-segment)
|
||||
find . -maxdepth 1 -name "OCR-D-IMG*" -type f -exec rm -v {} \;
|
||||
}
|
||||
|
||||
do_ocr() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue