mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-10-26 23:14:13 +01:00 
			
		
		
		
	🐛 Add --plain-encoding option to dinglehopper-extract
This commit is contained in:
		
							parent
							
								
									a70260c10e
								
							
						
					
					
						commit
						14a4bc56d8
					
				
					 1 changed files with 9 additions and 2 deletions
				
			
		|  | @ -12,7 +12,12 @@ from .ocr_files import extract | |||
|     help="PAGE TextEquiv level to extract text from", | ||||
|     metavar="LEVEL", | ||||
| ) | ||||
| def main(input_file, textequiv_level): | ||||
| @click.option( | ||||
|     "--plain-encoding", | ||||
|     default="autodetect", | ||||
|     help='Encoding (e.g. "utf-8") of plain text files', | ||||
| ) | ||||
| def main(input_file, textequiv_level, plain_encoding): | ||||
|     """ | ||||
|     Extract the text of the given INPUT_FILE. | ||||
| 
 | ||||
|  | @ -23,7 +28,9 @@ def main(input_file, textequiv_level): | |||
|     use "--textequiv-level line" to extract from the level of TextLine tags. | ||||
|     """ | ||||
|     initLogging() | ||||
|     input_text = extract(input_file, textequiv_level=textequiv_level).text | ||||
|     input_text = extract( | ||||
|         input_file, textequiv_level=textequiv_level, plain_encoding=plain_encoding | ||||
|     ).text | ||||
|     print(input_text) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue