mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 11:14:12 +01:00 
			
		
		
		
	🎨 Use long command lines again
This commit is contained in:
		
							parent
							
								
									6b83d5ae1e
								
							
						
					
					
						commit
						848dd143fd
					
				
					 1 changed files with 17 additions and 21 deletions
				
			
		|  | @ -10,11 +10,11 @@ fi | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| do_validate() { | do_validate() { | ||||||
|   validate_options=" |   validate_options=' | ||||||
|     --skip dimension |     --skip dimension | ||||||
|     --skip pixel_density |     --skip pixel_density | ||||||
|     --page-strictness lax |     --page-strictness lax | ||||||
|     --page-coordinate-consistency off" |     --page-coordinate-consistency off' | ||||||
|   ocrd workspace validate $validate_options |   ocrd workspace validate $validate_options | ||||||
|   # XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now |   # XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now | ||||||
| } | } | ||||||
|  | @ -22,12 +22,12 @@ do_validate() { | ||||||
| do_binarization() { | do_binarization() { | ||||||
|   # Binarize the images |   # Binarize the images | ||||||
| 
 | 
 | ||||||
|   ocrd_olena_binarize_parameters='{"impl": "sauvola-ms-split"}' |   ocrd_olena_binarize_parameters='{ | ||||||
|  |     "impl": "sauvola-ms-split" | ||||||
|  |   }' | ||||||
|   ocrd workspace remove-group -rf OCR-D-IMG-BINPAGE |   ocrd workspace remove-group -rf OCR-D-IMG-BINPAGE | ||||||
|   ocrd workspace remove-group -rf OCR-D-IMG-BIN |   ocrd workspace remove-group -rf OCR-D-IMG-BIN | ||||||
|   ocrd-olena-binarize \ |   ocrd-olena-binarize -I OCR-D-IMG -O OCR-D-IMG-BINPAGE -p "$ocrd_olena_binarize_parameters" | ||||||
|     -I OCR-D-IMG -O OCR-D-IMG-BINPAGE \ |  | ||||||
|     -p "$ocrd_olena_binarize_parameters" |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| do_linesegmentation_tesserocr() { | do_linesegmentation_tesserocr() { | ||||||
|  | @ -35,31 +35,29 @@ do_linesegmentation_tesserocr() { | ||||||
| 
 | 
 | ||||||
|   ocrd workspace remove-group -rf OCR-D-SEG-REGION |   ocrd workspace remove-group -rf OCR-D-SEG-REGION | ||||||
|   ocrd workspace remove-group -rf OCR-D-SEG-LINE |   ocrd workspace remove-group -rf OCR-D-SEG-LINE | ||||||
|   ocrd-tesserocr-segment-region \ |   ocrd-tesserocr-segment-region -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION | ||||||
|     -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION |   ocrd-tesserocr-segment-line -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE | ||||||
|   ocrd-tesserocr-segment-line \ |  | ||||||
|     -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| do_linesegmentation_sbb() { | do_linesegmentation_sbb() { | ||||||
|   # Segment the lines in the images |   # Segment the lines in the images | ||||||
| 
 | 
 | ||||||
|   ocrd_sbb_textline_detector_parameters='{"model": "/var/lib/textline_detection"}' |   ocrd_sbb_textline_detector_parameters='{ | ||||||
|  |     "model": "/var/lib/textline_detection" | ||||||
|  |   }' | ||||||
|   ocrd workspace remove-group -rf OCR-D-SEG-REGION |   ocrd workspace remove-group -rf OCR-D-SEG-REGION | ||||||
|   ocrd workspace remove-group -rf OCR-D-SEG-LINE |   ocrd workspace remove-group -rf OCR-D-SEG-LINE | ||||||
|   ocrd-sbb-textline-detector \ |   ocrd-sbb-textline-detector -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE -p "$ocrd_sbb_textline_detector_parameters" | ||||||
|     -I OCR-D-IMG-BINPAGE -O OCR-D-SEG-LINE \ |  | ||||||
|     -p "$ocrd_sbb_textline_detector_parameters" |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| do_ocr() { | do_ocr() { | ||||||
|   # Perform OCR on the segmented lines |   # Perform OCR on the segmented lines | ||||||
| 
 | 
 | ||||||
|   ocrd_tesserocr_recognize_parameters='{ "model": "GT4HistOCR_2000000" }' |   ocrd_tesserocr_recognize_parameters='{ | ||||||
|  |     "model": "GT4HistOCR_2000000" | ||||||
|  |   }' | ||||||
|   ocrd workspace remove-group -rf OCR-D-OCR-TESS |   ocrd workspace remove-group -rf OCR-D-OCR-TESS | ||||||
|   ocrd-tesserocr-recognize \ |   ocrd-tesserocr-recognize -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS -p "$ocrd_tesserocr_recognize_parameters" | ||||||
|     -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS \ |  | ||||||
|     -p "$ocrd_tesserocr_recognize_parameters" |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| do_ocr_calamari() { | do_ocr_calamari() { | ||||||
|  | @ -68,9 +66,7 @@ do_ocr_calamari() { | ||||||
|     "textequiv_level": "line" |     "textequiv_level": "line" | ||||||
|   }' |   }' | ||||||
|   ocrd workspace remove-group -rf OCR-D-OCR-CALAMARI |   ocrd workspace remove-group -rf OCR-D-OCR-CALAMARI | ||||||
|   ocrd-calamari-recognize \ |   ocrd-calamari-recognize -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -p "$ocrd_calamari_recognize_parameters" | ||||||
|     -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI \ |  | ||||||
|     -p "$ocrd_calamari_recognize_parameters" |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| page_validate_xml() { | page_validate_xml() { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue