Update to sbb_textline_detector with the fixed AlternativeImage support (= merged PAGE results)

pull/27/head
Gerber, Mike 5 years ago
parent de47a3e5b1
commit d166077a55

@ -35,9 +35,10 @@ do_validate() {
do_binarization() {
# Binarize the images
remove_filegrp OCR-D-IMG-BINPAGE mets.xml
remove_filegrp OCR-D-IMG-BIN mets.xml
ocrd-olena-binarize -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BIN \
-m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BINPAGE \
-p <(echo '{"impl": "sauvola-ms-split"}')
}
@ -66,11 +67,11 @@ do_linesegmentation_tesserocr() {
remove_filegrp OCR-D-SEG-REGION mets.xml
remove_filegrp OCR-D-SEG-LINE mets.xml
#ocrd-ocropy-segment -l $LOG_LEVEL \
# -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE
# -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE
# XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd
ocrd-tesserocr-segment-region -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION
-m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION
ocrd-tesserocr-segment-line -l $LOG_LEVEL \
-m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
@ -79,11 +80,12 @@ do_linesegmentation_tesserocr() {
do_linesegmentation_sbb() {
# Segment the lines in the images
# TODO: Check that this works with the RGB images
remove_filegrp OCR-D-SEG-REGION mets.xml
remove_filegrp OCR-D-SEG-LINE mets.xml
ocrd_sbb_textline_detector -l $LOG_LEVEL \
-m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE \
-m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE \
-p '{"model": "/var/lib/textline_detection"}'
}
@ -120,25 +122,6 @@ page_validate_xml() {
done
}
page_fix_image_references_to_bin() {
# Make image references point to the binarized images
# XXX This is a hack, it is probably better to use alternative images in ocrd_calamari
filegrp=$1
local file
for file in `ocrd workspace find -G $filegrp`; do
# Arrays with filenames to the images
imgs=(`ocrd workspace find -G OCR-D-IMG`)
imgs_bin=(`ocrd workspace find -G OCR-D-IMG-BIN -m image/png`)
# Change all image references to point to the corresponding binarized image
for i in ${!imgs[@]}; do
sed -i "s!imageFilename=.${imgs[$i]}.!imageFilename=\"${imgs_bin[$i]}\"!g" $file
done
done
}
page_downgrade_to_2018() {
# Not used anymore, but kept if needed in the future
filegrp=$1
@ -170,7 +153,6 @@ do_validate
do_linesegmentation_sbb
page_fix_image_references_to_bin OCR-D-SEG-LINE
page_upgrade_to_2019 OCR-D-SEG-LINE
page_validate_xml OCR-D-SEG-REGION
page_validate_xml OCR-D-SEG-LINE

@ -1,6 +1,6 @@
tensorflow-gpu < 2.0 # Needed for sbb_text_linedetector
ocrd >= 1.0.0
ocrd >= 2.0.0
https://github.com/mikegerber/ocrd_typegroups_classifier/archive/fix/pass-down-page-id.tar.gz # XXX git+https://github.com/seuretm/ocrd_typegroups_classifier.git
@ -11,6 +11,6 @@ ocrd_tesserocr
https://github.com/mikegerber/ocrd_calamari/archive/6949876.tar.gz
vendor/sbb_textline_detector-d905c0b.tar
vendor/sbb_textline_detector-10bbda9.tar
https://github.com/qurator-spk/dinglehopper/archive/c305539.tar.gz

Loading…
Cancel
Save