mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-09 04:59:54 +02:00
Use ocrd-tesserocr-segment-*
This commit is contained in:
parent
e30f03699c
commit
5e1ece4877
1 changed files with 14 additions and 8 deletions
|
@ -40,18 +40,24 @@ do_fontident() {
|
||||||
}
|
}
|
||||||
|
|
||||||
do_linesegmentation() {
|
do_linesegmentation() {
|
||||||
|
remove_filegrp OCR-D-SEG-REGION mets.xml
|
||||||
remove_filegrp OCR-D-SEG-LINE mets.xml
|
remove_filegrp OCR-D-SEG-LINE mets.xml
|
||||||
ocrd-ocropy-segment -l $LOG_LEVEL \
|
#ocrd-ocropy-segment -l $LOG_LEVEL \
|
||||||
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
|
# -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE
|
||||||
|
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
||||||
#ocrd workspace validate mets.xml
|
#ocrd workspace validate mets.xml
|
||||||
|
|
||||||
# XXX This leaves copies of the images at the top level of the workspace, because it "downloads" the "remote" files.
|
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
|
||||||
# Clean it up.
|
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-REGION
|
||||||
find . -maxdepth 1 -name "OCR-D-IMG*" -type f -exec rm -v {} \;
|
#ocrd workspace validate mets.xml
|
||||||
|
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
|
||||||
|
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
||||||
|
#ocrd workspace validate mets.xml
|
||||||
|
# XXX compare ocrd-tesserocr-segment* vs tesseract native
|
||||||
|
|
||||||
# XXX ocrd-tesserocr-segment-line does not seem to produce any line segmentation
|
# XXX This leaves copies of the images at the top level of the workspace, because it "downloads" the "remote" files.
|
||||||
# XXX mv {ocrd-ocropy-segment,-line}
|
# Clean it up. (Maybe only affects ocrd-ocropy-segment)
|
||||||
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
|
find . -maxdepth 1 -name "OCR-D-IMG*" -type f -exec rm -v {} \;
|
||||||
}
|
}
|
||||||
|
|
||||||
do_ocr() {
|
do_ocr() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue