mirror of
				https://github.com/qurator-spk/page2tsv.git
				synced 2025-10-31 08:34:13 +01:00 
			
		
		
		
	Merge branch 'master' into ocrd-processors
# Conflicts: # setup.py
This commit is contained in:
		
						commit
						db25239075
					
				
					 3 changed files with 38 additions and 2 deletions
				
			
		
							
								
								
									
										37
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										37
									
								
								README.md
									
										
									
									
									
								
							|  | @ -1,7 +1,11 @@ | |||
| # TSV - Processing Tools | ||||
| 
 | ||||
| Create .tsv files that can be viewed and edited with [neat](https://github.com/qurator-spk/neat). | ||||
| 
 | ||||
| ## Installation: | ||||
| 
 | ||||
| Clone this project and the [SBB-utils](https://github.com/qurator-spk/sbb_utils). | ||||
| 
 | ||||
| Setup virtual environment: | ||||
| ``` | ||||
| virtualenv --python=python3.6 venv | ||||
|  | @ -19,7 +23,8 @@ pip install -U pip | |||
| 
 | ||||
| Install package together with its dependencies in development mode: | ||||
| ``` | ||||
| pip install -e ./ | ||||
| pip install -e sbb_utils | ||||
| pip install -e page2tsv | ||||
| ``` | ||||
| 
 | ||||
| ## PAGE-XML to TSV Transformation: | ||||
|  | @ -59,3 +64,33 @@ Create a URL-annotated TSV file from an existing TSV file: | |||
| ``` | ||||
| annotate-tsv enp_DE.tsv enp_DE-annotated.tsv | ||||
| ``` | ||||
| 
 | ||||
| # Command-line interface: | ||||
| 
 | ||||
| ``` | ||||
| page2tsv [OPTIONS] PAGE_XML_FILE TSV_OUT_FILE | ||||
| 
 | ||||
| Options: | ||||
|   --purpose [NERD|OCR]      Purpose of output tsv file. | ||||
|                              | ||||
|                             NERD: NER/NED application/ground-truth creation. | ||||
|                              | ||||
|                             OCR: OCR application/ground-truth creation. | ||||
|                              | ||||
|                             default: NERD. | ||||
|   --image-url TEXT | ||||
|   --ner-rest-endpoint TEXT  REST endpoint of sbb_ner service. See | ||||
|                             https://github.com/qurator-spk/sbb_ner for | ||||
|                             details. Only applicable in case of NERD. | ||||
|   --ned-rest-endpoint TEXT  REST endpoint of sbb_ned service. See | ||||
|                             https://github.com/qurator-spk/sbb_ned for | ||||
|                             details. Only applicable in case of NERD. | ||||
|   --noproxy                 disable proxy. default: enabled. | ||||
|   --scale-factor FLOAT      default: 1.0 | ||||
|   --ned-threshold FLOAT | ||||
|   --min-confidence FLOAT | ||||
|   --max-confidence FLOAT | ||||
|   --ned-priority INTEGER | ||||
|   --help                    Show this message and exit. | ||||
| 
 | ||||
| ``` | ||||
|  | @ -1,4 +1,4 @@ | |||
| ocrd >= 2.23.2 | ||||
| pandas | ||||
| matplotlib | ||||
| qurator-sbb-tools | ||||
| qurator-sbb-utils | ||||
|  | @ -19,6 +19,7 @@ from .ocr import get_conf_color | |||
| from qurator.utils.ner import ner | ||||
| from qurator.utils.ned import ned | ||||
| 
 | ||||
| 
 | ||||
| @click.command() | ||||
| @click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1) | ||||
| @click.argument('url-file', type=click.Path(exists=False), required=True, nargs=1) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue