✨ Use sbb_textline_detector to segment lines

2026-06-28 23:49:14 +02:00 · 2019-10-11 19:16:43 +02:00 · 2019-10-11 19:16:43 +02:00 · 6454d20998
commit 6454d20998
parent 735e9599d7
6 changed files with 49 additions and 11 deletions
--- a/7
+++ b/7
@ -17,6 +17,8 @@ RUN apt-get update && \
      cmake libgif-dev libjpeg-dev libpng-dev libtiff-dev zlib1g-dev \
 # For clstm on Ubuntu 19.04:
      swig libeigen3-dev libpng-dev libprotobuf-dev \
 # For cv2:
      libsm6 libxrender1 \
 # XML utils
      libxml2-utils \
      xmlstarlet \
@ -53,6 +55,11 @@ COPY data/tesseract-models/GT4HistOCR/GT4HistOCR_2000000.traineddata $TESSDATA_P
 RUN tesseract --list-langs
 # Copy over sbb_textline_detector
 COPY vendor vendor
 COPY data/textline_detection /var/lib/textline_detection
 COPY requirements.txt /tmp
 RUN pip3 install --no-cache-dir -r /tmp/requirements.txt
--- a/1
+++ b/1
@ -7,6 +7,7 @@ set -e
  git annex upgrade
  git annex get calamari-models/GT4HistOCR/*.ckpt*
  git annex get tesseract-models/GT4HistOCR/*.traineddata
  git annex get textline_detection/*.h5
 )
 docker build -t my_ocrd_workflow .
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit eb7412a1efbcba53567ec37237732e96e839dbe8
+Subproject commit bcc1aec082cb81c29668ffef3d04c51eaa866b5c
--- a/44
+++ b/44
@ -59,7 +59,7 @@ do_fontident() {
  #     any DEFAULT, yet -I DEFAULT seems to work for ocrd-typegroups-classifier
 }
-do_linesegmentation() {
+do_linesegmentation_tesserocr() {
  # Segment the lines in the binarized images
  remove_filegrp OCR-D-SEG-REGION mets.xml
@ -76,6 +76,16 @@ do_linesegmentation() {
  # XXX compare ocrd-tesserocr-segment* vs tesseract native
 }
 do_linesegmentation_sbb() {
  # Segment the lines in the images
  remove_filegrp OCR-D-SEG-REGION mets.xml
  remove_filegrp OCR-D-SEG-LINE mets.xml
  ocrd_sbb_textline_detector -l $LOG_LEVEL \
    -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE \
    -p '{"model": "/var/lib/textline_detection"}'
 }
 do_ocr() {
  # Perform OCR on the segmented lines
@ -123,16 +133,22 @@ page_fix_image_references() {
  done
 }
-page_workaround_remove_conf() {
+page_fix_image_references_to_bin() {
-  # XXX Work around https://github.com/OCR-D/core/issues/269
+  # Make image references point to the binarized images
  # XXX This is a hack, it is probably better to use alternative images in ocrd_calamari
  filegrp=$1
  local file
  for file in `ocrd workspace find -G $filegrp`; do
-    xmlstarlet ed --inplace \
+    # Arrays with filenames to the images
-    -N 'page=http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' \
+    imgs=(`ocrd workspace find -G OCR-D-IMG`)
-    -d '//page:TextEquiv/@conf' $file
+    imgs_bin=(`ocrd workspace find -G OCR-D-IMG-BIN`)
    # Change all image references to point to the corresponding binarized image
    for i in ${!imgs[@]}; do
      sed -i "s!imageFilename=.${imgs[$i]}.!imageFilename=\"${imgs_bin[$i]}\"!g" $file
    done
  done
 }
@ -146,6 +162,14 @@ page_downgrade_to_2018() {
  done
 }
 page_upgrade_to_2019() {
  filegrp=$1
  local file
  for file in `ocrd workspace find -G $filegrp`; do
    sed -i 's#pagecontent/[0-9-]*#pagecontent/2019-07-15#g' $file
  done
 }
 pip3 list
@ -158,9 +182,11 @@ do_binarization
 do_validate
-do_linesegmentation
+do_linesegmentation_sbb
-page_validate_xml           OCR-D-SEG-REGION
+page_fix_image_references_to_bin OCR-D-SEG-LINE
-page_validate_xml           OCR-D-SEG-LINE
+page_upgrade_to_2019             OCR-D-SEG-LINE
 page_validate_xml                OCR-D-SEG-REGION
 page_validate_xml                OCR-D-SEG-LINE
 do_validate
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,6 @@
-Pillow==5.4.1  # See https://github.com/OCR-D/core/issues/325
+tensorflow-gpu < 2.0  # Needed for sbb_text_linedetector
 Pillow==5.4.1         # See https://github.com/OCR-D/core/issues/325
 ocrd >= 1.0.0b19
 https://github.com/mikegerber/ocrd_typegroups_classifier/archive/fix/pass-down-page-id.tar.gz  # XXX git+https://github.com/seuretm/ocrd_typegroups_classifier.git
@ -9,4 +11,6 @@ ocrd_tesserocr
 https://github.com/mikegerber/ocrd_calamari/archive/3e8c1ac.tar.gz
 vendor/sbb_textline_detector-b1663f7.tar
 https://github.com/qurator-spk/dinglehopper/archive/c305539.tar.gz
--- a/vendor/sbb_textline_detector-b1663f7.tar
+++ b/vendor/sbb_textline_detector-b1663f7.tar
		`@ -1 +1 @@`
			`Subproject commit eb7412a1efbcba53567ec37237732e96e839dbe8`				`Subproject commit bcc1aec082cb81c29668ffef3d04c51eaa866b5c`