mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-11-04 04:54:13 +01:00 
			
		
		
		
	🚧 Add my_ocrd_workflow-sbb for the SBB test workflow
	
		
			
	
		
	
	
		
	
		
			Some checks reported errors
		
		
	
	
		
			
				
	
				continuous-integration/drone/push Build was killed
				
			
		
		
	
	
				
					
				
			
		
			Some checks reported errors
		
		
	
	continuous-integration/drone/push Build was killed
				
			This commit is contained in:
		
							parent
							
								
									1606cc522c
								
							
						
					
					
						commit
						1c68865e0a
					
				
					 1 changed files with 88 additions and 0 deletions
				
			
		
							
								
								
									
										88
									
								
								my_ocrd_workflow-sbb
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										88
									
								
								my_ocrd_workflow-sbb
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
					@ -0,0 +1,88 @@
 | 
				
			||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set -e  # Abort on error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Configuration
 | 
				
			||||||
 | 
					export LOG_LEVEL=${LOG_LEVEL:-INFO}  # /etc/ocrd_logging.py uses this to set level for all OCR-D modules
 | 
				
			||||||
 | 
					export TEXTEQUIV_LEVEL=word
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Command line parameters
 | 
				
			||||||
 | 
					OPTS=`getopt -o I: --long input-file-grp:,skip-validation -- "$@"`
 | 
				
			||||||
 | 
					eval set -- "$OPTS"
 | 
				
			||||||
 | 
					INPUT_FILE_GRP=OCR-D-IMG
 | 
				
			||||||
 | 
					SKIP_VALIDATION=false
 | 
				
			||||||
 | 
					while true; do
 | 
				
			||||||
 | 
					  case "$1" in
 | 
				
			||||||
 | 
					    -I|--input-file-grp) INPUT_FILE_GRP=$2; shift 2;;
 | 
				
			||||||
 | 
					    --skip-validation) SKIP_VALIDATION=true; shift;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    --) shift; break;;
 | 
				
			||||||
 | 
					    *) break;;
 | 
				
			||||||
 | 
					  esac
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Set up logging
 | 
				
			||||||
 | 
					if [ "$LOG_LEVEL" = "DEBUG" -o "$LOG_LEVEL" = "TRACE" ]; then
 | 
				
			||||||
 | 
					  set -x
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					do_validate() {
 | 
				
			||||||
 | 
					  # Validate the workspace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Both ocrd_tesserocr + ocrd_calamari produce segment coordinates that are not strictly within their parent's
 | 
				
			||||||
 | 
					  # coordinates:
 | 
				
			||||||
 | 
					  #
 | 
				
			||||||
 | 
					  #     INCONSISTENCY in [...] coords [...] not within parent coords
 | 
				
			||||||
 | 
					  #
 | 
				
			||||||
 | 
					  # → --page-coordinate-consistency off
 | 
				
			||||||
 | 
					  #
 | 
				
			||||||
 | 
					  # ocrd_tesserocr sometimes produces segment text results that aren't concatenating as expected by the validator:
 | 
				
			||||||
 | 
					  #
 | 
				
			||||||
 | 
					  #     INCONSISTENCY in [...]: text results '[...]' != concatenated '[...]'
 | 
				
			||||||
 | 
					  #
 | 
				
			||||||
 | 
					  # → --page-strictness lax
 | 
				
			||||||
 | 
					  #
 | 
				
			||||||
 | 
					  validate_options='
 | 
				
			||||||
 | 
					    --skip dimension
 | 
				
			||||||
 | 
					    --skip pixel_density
 | 
				
			||||||
 | 
					    --page-strictness lax
 | 
				
			||||||
 | 
					    --page-coordinate-consistency off'
 | 
				
			||||||
 | 
					  if [ "$SKIP_VALIDATION" = false ]; then
 | 
				
			||||||
 | 
					    ocrd workspace validate $validate_options
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					main() {
 | 
				
			||||||
 | 
					  do_validate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ocrd-sbb-binarize --overwrite -I $INPUT_FILE_GRP -O OCR-D-IMG-BIN -P model "/var/lib/sbb_binarization"
 | 
				
			||||||
 | 
					  do_validate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ocrd-sbb-textline-detector --overwrite -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE -P model "/var/lib/textline_detection"
 | 
				
			||||||
 | 
					  do_validate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL"
 | 
				
			||||||
 | 
					  do_validate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for ocr_filegrp in OCR-D-OCR-CALAMARI OCR-D-OCR-TESS; do
 | 
				
			||||||
 | 
					    if ocrd workspace list-group | grep -q OCR-D-GT-PAGE; then
 | 
				
			||||||
 | 
					      ocrd-dinglehopper --overwrite -I OCR-D-GT-PAGE,$ocr_filegrp -O $ocr_filegrp-EVAL
 | 
				
			||||||
 | 
					    fi
 | 
				
			||||||
 | 
					    ocrd-fileformat-transform --overwrite -I $ocr_filegrp -O ${ocr_filegrp}-ALTO
 | 
				
			||||||
 | 
					  done
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if [ "$LOG_LEVEL" = "DEBUG" -o "$LOG_LEVEL" = "TRACE" ]; then
 | 
				
			||||||
 | 
					  pip list || true
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					main
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# vim:tw=120:
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue