mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 19:24:12 +01:00 
			
		
		
		
	📝 Document do_validate() options better
This commit is contained in:
		
							parent
							
								
									1252d8ccc3
								
							
						
					
					
						commit
						6ae85063c5
					
				
					 1 changed files with 15 additions and 1 deletions
				
			
		|  | @ -10,13 +10,27 @@ fi | |||
| 
 | ||||
| 
 | ||||
| do_validate() { | ||||
|   # Validate the workspace | ||||
| 
 | ||||
|   # Both ocrd_tesserocr + ocrd_calamari produce segment coordinates that are not strictly within their parent's | ||||
|   # coordinates: | ||||
|   # | ||||
|   #     INCONSISTENCY in [...] coords [...] not within parent coords | ||||
|   # | ||||
|   # → --page-coordinate-consistency off | ||||
|   # | ||||
|   # ocrd_tesserocr sometimes produces segment text results that aren't concatenating as expected by the validator: | ||||
|   # | ||||
|   #     INCONSISTENCY in [...]: text results '[...]' != concatenated '[...]' | ||||
|   # | ||||
|   # → --page-strictness lax | ||||
|   # | ||||
|   validate_options=' | ||||
|     --skip dimension | ||||
|     --skip pixel_density | ||||
|     --page-strictness lax | ||||
|     --page-coordinate-consistency off' | ||||
|   ocrd workspace validate $validate_options | ||||
|   # XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now | ||||
| } | ||||
| 
 | ||||
| do_binarization() { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue