mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 03:04:12 +01:00 
			
		
		
		
	✨ Update to sbb_textline_detector with the fixed AlternativeImage support (= merged PAGE results)
This commit is contained in:
		
							parent
							
								
									de47a3e5b1
								
							
						
					
					
						commit
						d166077a55
					
				
					 3 changed files with 8 additions and 26 deletions
				
			
		|  | @ -35,9 +35,10 @@ do_validate() { | ||||||
| do_binarization() { | do_binarization() { | ||||||
|   # Binarize the images |   # Binarize the images | ||||||
| 
 | 
 | ||||||
|  |   remove_filegrp OCR-D-IMG-BINPAGE mets.xml | ||||||
|   remove_filegrp OCR-D-IMG-BIN mets.xml |   remove_filegrp OCR-D-IMG-BIN mets.xml | ||||||
|   ocrd-olena-binarize -l $LOG_LEVEL \ |   ocrd-olena-binarize -l $LOG_LEVEL \ | ||||||
|     -m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BIN \ |     -m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BINPAGE \ | ||||||
|     -p <(echo '{"impl": "sauvola-ms-split"}') |     -p <(echo '{"impl": "sauvola-ms-split"}') | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -66,11 +67,11 @@ do_linesegmentation_tesserocr() { | ||||||
|   remove_filegrp OCR-D-SEG-REGION mets.xml |   remove_filegrp OCR-D-SEG-REGION mets.xml | ||||||
|   remove_filegrp OCR-D-SEG-LINE mets.xml |   remove_filegrp OCR-D-SEG-LINE mets.xml | ||||||
|   #ocrd-ocropy-segment -l $LOG_LEVEL \ |   #ocrd-ocropy-segment -l $LOG_LEVEL \ | ||||||
|   #  -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE |   #  -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE | ||||||
|   # XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd |   # XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd | ||||||
| 
 | 
 | ||||||
|   ocrd-tesserocr-segment-region -l $LOG_LEVEL \ |   ocrd-tesserocr-segment-region -l $LOG_LEVEL \ | ||||||
|     -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION |     -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION | ||||||
|   ocrd-tesserocr-segment-line -l $LOG_LEVEL \ |   ocrd-tesserocr-segment-line -l $LOG_LEVEL \ | ||||||
|     -m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE |     -m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE | ||||||
| 
 | 
 | ||||||
|  | @ -79,11 +80,12 @@ do_linesegmentation_tesserocr() { | ||||||
| 
 | 
 | ||||||
| do_linesegmentation_sbb() { | do_linesegmentation_sbb() { | ||||||
|   # Segment the lines in the images |   # Segment the lines in the images | ||||||
|  |   # TODO: Check that this works with the RGB images | ||||||
| 
 | 
 | ||||||
|   remove_filegrp OCR-D-SEG-REGION mets.xml |   remove_filegrp OCR-D-SEG-REGION mets.xml | ||||||
|   remove_filegrp OCR-D-SEG-LINE mets.xml |   remove_filegrp OCR-D-SEG-LINE mets.xml | ||||||
|   ocrd_sbb_textline_detector -l $LOG_LEVEL \ |   ocrd_sbb_textline_detector -l $LOG_LEVEL \ | ||||||
|     -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE \ |     -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE \ | ||||||
|     -p '{"model": "/var/lib/textline_detection"}' |     -p '{"model": "/var/lib/textline_detection"}' | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -120,25 +122,6 @@ page_validate_xml() { | ||||||
|   done |   done | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| page_fix_image_references_to_bin() { |  | ||||||
|   # Make image references point to the binarized images |  | ||||||
|   # XXX This is a hack, it is probably better to use alternative images in ocrd_calamari |  | ||||||
| 
 |  | ||||||
|   filegrp=$1 |  | ||||||
| 
 |  | ||||||
|   local file |  | ||||||
|   for file in `ocrd workspace find -G $filegrp`; do |  | ||||||
|     # Arrays with filenames to the images |  | ||||||
|     imgs=(`ocrd workspace find -G OCR-D-IMG`) |  | ||||||
|     imgs_bin=(`ocrd workspace find -G OCR-D-IMG-BIN -m image/png`) |  | ||||||
| 
 |  | ||||||
|     # Change all image references to point to the corresponding binarized image |  | ||||||
|     for i in ${!imgs[@]}; do |  | ||||||
|       sed -i "s!imageFilename=.${imgs[$i]}.!imageFilename=\"${imgs_bin[$i]}\"!g" $file |  | ||||||
|     done |  | ||||||
|   done |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| page_downgrade_to_2018() { | page_downgrade_to_2018() { | ||||||
|   # Not used anymore, but kept if needed in the future |   # Not used anymore, but kept if needed in the future | ||||||
|   filegrp=$1 |   filegrp=$1 | ||||||
|  | @ -170,7 +153,6 @@ do_validate | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| do_linesegmentation_sbb | do_linesegmentation_sbb | ||||||
| page_fix_image_references_to_bin OCR-D-SEG-LINE |  | ||||||
| page_upgrade_to_2019             OCR-D-SEG-LINE | page_upgrade_to_2019             OCR-D-SEG-LINE | ||||||
| page_validate_xml                OCR-D-SEG-REGION | page_validate_xml                OCR-D-SEG-REGION | ||||||
| page_validate_xml                OCR-D-SEG-LINE | page_validate_xml                OCR-D-SEG-LINE | ||||||
|  |  | ||||||
|  | @ -1,6 +1,6 @@ | ||||||
| tensorflow-gpu < 2.0  # Needed for sbb_text_linedetector | tensorflow-gpu < 2.0  # Needed for sbb_text_linedetector | ||||||
| 
 | 
 | ||||||
| ocrd >= 1.0.0 | ocrd >= 2.0.0 | ||||||
| 
 | 
 | ||||||
| https://github.com/mikegerber/ocrd_typegroups_classifier/archive/fix/pass-down-page-id.tar.gz  # XXX git+https://github.com/seuretm/ocrd_typegroups_classifier.git | https://github.com/mikegerber/ocrd_typegroups_classifier/archive/fix/pass-down-page-id.tar.gz  # XXX git+https://github.com/seuretm/ocrd_typegroups_classifier.git | ||||||
| 
 | 
 | ||||||
|  | @ -11,6 +11,6 @@ ocrd_tesserocr | ||||||
| https://github.com/mikegerber/ocrd_calamari/archive/6949876.tar.gz | https://github.com/mikegerber/ocrd_calamari/archive/6949876.tar.gz | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| vendor/sbb_textline_detector-d905c0b.tar | vendor/sbb_textline_detector-10bbda9.tar | ||||||
| 
 | 
 | ||||||
| https://github.com/qurator-spk/dinglehopper/archive/c305539.tar.gz | https://github.com/qurator-spk/dinglehopper/archive/c305539.tar.gz | ||||||
|  |  | ||||||
										
											Binary file not shown.
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue