mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 03:04:12 +01:00 
			
		
		
		
	✨ Update to sbb_textline_detector with the fixed AlternativeImage support (= merged PAGE results)
This commit is contained in:
		
							parent
							
								
									de47a3e5b1
								
							
						
					
					
						commit
						d166077a55
					
				
					 3 changed files with 8 additions and 26 deletions
				
			
		|  | @ -35,9 +35,10 @@ do_validate() { | |||
| do_binarization() { | ||||
|   # Binarize the images | ||||
| 
 | ||||
|   remove_filegrp OCR-D-IMG-BINPAGE mets.xml | ||||
|   remove_filegrp OCR-D-IMG-BIN mets.xml | ||||
|   ocrd-olena-binarize -l $LOG_LEVEL \ | ||||
|     -m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BIN \ | ||||
|     -m mets.xml -I OCR-D-IMG -O OCR-D-IMG-BINPAGE \ | ||||
|     -p <(echo '{"impl": "sauvola-ms-split"}') | ||||
| } | ||||
| 
 | ||||
|  | @ -66,11 +67,11 @@ do_linesegmentation_tesserocr() { | |||
|   remove_filegrp OCR-D-SEG-REGION mets.xml | ||||
|   remove_filegrp OCR-D-SEG-LINE mets.xml | ||||
|   #ocrd-ocropy-segment -l $LOG_LEVEL \ | ||||
|   #  -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE | ||||
|   #  -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE | ||||
|   # XXX ocrd-ocropy-segment throws an exception for buerger_gedichte_1778.ocrd | ||||
| 
 | ||||
|   ocrd-tesserocr-segment-region -l $LOG_LEVEL \ | ||||
|     -m mets.xml -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION | ||||
|     -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION | ||||
|   ocrd-tesserocr-segment-line -l $LOG_LEVEL \ | ||||
|     -m mets.xml -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE | ||||
| 
 | ||||
|  | @ -79,11 +80,12 @@ do_linesegmentation_tesserocr() { | |||
| 
 | ||||
| do_linesegmentation_sbb() { | ||||
|   # Segment the lines in the images | ||||
|   # TODO: Check that this works with the RGB images | ||||
| 
 | ||||
|   remove_filegrp OCR-D-SEG-REGION mets.xml | ||||
|   remove_filegrp OCR-D-SEG-LINE mets.xml | ||||
|   ocrd_sbb_textline_detector -l $LOG_LEVEL \ | ||||
|     -m mets.xml -I OCR-D-IMG -O OCR-D-SEG-LINE \ | ||||
|     -m mets.xml -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE \ | ||||
|     -p '{"model": "/var/lib/textline_detection"}' | ||||
| } | ||||
| 
 | ||||
|  | @ -120,25 +122,6 @@ page_validate_xml() { | |||
|   done | ||||
| } | ||||
| 
 | ||||
| page_fix_image_references_to_bin() { | ||||
|   # Make image references point to the binarized images | ||||
|   # XXX This is a hack, it is probably better to use alternative images in ocrd_calamari | ||||
| 
 | ||||
|   filegrp=$1 | ||||
| 
 | ||||
|   local file | ||||
|   for file in `ocrd workspace find -G $filegrp`; do | ||||
|     # Arrays with filenames to the images | ||||
|     imgs=(`ocrd workspace find -G OCR-D-IMG`) | ||||
|     imgs_bin=(`ocrd workspace find -G OCR-D-IMG-BIN -m image/png`) | ||||
| 
 | ||||
|     # Change all image references to point to the corresponding binarized image | ||||
|     for i in ${!imgs[@]}; do | ||||
|       sed -i "s!imageFilename=.${imgs[$i]}.!imageFilename=\"${imgs_bin[$i]}\"!g" $file | ||||
|     done | ||||
|   done | ||||
| } | ||||
| 
 | ||||
| page_downgrade_to_2018() { | ||||
|   # Not used anymore, but kept if needed in the future | ||||
|   filegrp=$1 | ||||
|  | @ -170,7 +153,6 @@ do_validate | |||
| 
 | ||||
| 
 | ||||
| do_linesegmentation_sbb | ||||
| page_fix_image_references_to_bin OCR-D-SEG-LINE | ||||
| page_upgrade_to_2019             OCR-D-SEG-LINE | ||||
| page_validate_xml                OCR-D-SEG-REGION | ||||
| page_validate_xml                OCR-D-SEG-LINE | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| tensorflow-gpu < 2.0  # Needed for sbb_text_linedetector | ||||
| 
 | ||||
| ocrd >= 1.0.0 | ||||
| ocrd >= 2.0.0 | ||||
| 
 | ||||
| https://github.com/mikegerber/ocrd_typegroups_classifier/archive/fix/pass-down-page-id.tar.gz  # XXX git+https://github.com/seuretm/ocrd_typegroups_classifier.git | ||||
| 
 | ||||
|  | @ -11,6 +11,6 @@ ocrd_tesserocr | |||
| https://github.com/mikegerber/ocrd_calamari/archive/6949876.tar.gz | ||||
| 
 | ||||
| 
 | ||||
| vendor/sbb_textline_detector-d905c0b.tar | ||||
| vendor/sbb_textline_detector-10bbda9.tar | ||||
| 
 | ||||
| https://github.com/qurator-spk/dinglehopper/archive/c305539.tar.gz | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue