mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 19:24:12 +01:00 
			
		
		
		
	📝 Document do_validate() options better
This commit is contained in:
		
							parent
							
								
									1252d8ccc3
								
							
						
					
					
						commit
						6ae85063c5
					
				
					 1 changed files with 15 additions and 1 deletions
				
			
		|  | @ -10,13 +10,27 @@ fi | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| do_validate() { | do_validate() { | ||||||
|  |   # Validate the workspace | ||||||
|  | 
 | ||||||
|  |   # Both ocrd_tesserocr + ocrd_calamari produce segment coordinates that are not strictly within their parent's | ||||||
|  |   # coordinates: | ||||||
|  |   # | ||||||
|  |   #     INCONSISTENCY in [...] coords [...] not within parent coords | ||||||
|  |   # | ||||||
|  |   # → --page-coordinate-consistency off | ||||||
|  |   # | ||||||
|  |   # ocrd_tesserocr sometimes produces segment text results that aren't concatenating as expected by the validator: | ||||||
|  |   # | ||||||
|  |   #     INCONSISTENCY in [...]: text results '[...]' != concatenated '[...]' | ||||||
|  |   # | ||||||
|  |   # → --page-strictness lax | ||||||
|  |   # | ||||||
|   validate_options=' |   validate_options=' | ||||||
|     --skip dimension |     --skip dimension | ||||||
|     --skip pixel_density |     --skip pixel_density | ||||||
|     --page-strictness lax |     --page-strictness lax | ||||||
|     --page-coordinate-consistency off' |     --page-coordinate-consistency off' | ||||||
|   ocrd workspace validate $validate_options |   ocrd workspace validate $validate_options | ||||||
|   # XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| do_binarization() { | do_binarization() { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue