mirror of
				https://github.com/qurator-spk/page2tsv.git
				synced 2025-10-31 08:34:13 +01:00 
			
		
		
		
	Merge branch 'master' into ocrd-processors
# Conflicts: # setup.py
This commit is contained in:
		
						commit
						db25239075
					
				
					 3 changed files with 38 additions and 2 deletions
				
			
		
							
								
								
									
										37
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										37
									
								
								README.md
									
										
									
									
									
								
							|  | @ -1,7 +1,11 @@ | ||||||
| # TSV - Processing Tools | # TSV - Processing Tools | ||||||
| 
 | 
 | ||||||
|  | Create .tsv files that can be viewed and edited with [neat](https://github.com/qurator-spk/neat). | ||||||
|  | 
 | ||||||
| ## Installation: | ## Installation: | ||||||
| 
 | 
 | ||||||
|  | Clone this project and the [SBB-utils](https://github.com/qurator-spk/sbb_utils). | ||||||
|  | 
 | ||||||
| Setup virtual environment: | Setup virtual environment: | ||||||
| ``` | ``` | ||||||
| virtualenv --python=python3.6 venv | virtualenv --python=python3.6 venv | ||||||
|  | @ -19,7 +23,8 @@ pip install -U pip | ||||||
| 
 | 
 | ||||||
| Install package together with its dependencies in development mode: | Install package together with its dependencies in development mode: | ||||||
| ``` | ``` | ||||||
| pip install -e ./ | pip install -e sbb_utils | ||||||
|  | pip install -e page2tsv | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## PAGE-XML to TSV Transformation: | ## PAGE-XML to TSV Transformation: | ||||||
|  | @ -59,3 +64,33 @@ Create a URL-annotated TSV file from an existing TSV file: | ||||||
| ``` | ``` | ||||||
| annotate-tsv enp_DE.tsv enp_DE-annotated.tsv | annotate-tsv enp_DE.tsv enp_DE-annotated.tsv | ||||||
| ``` | ``` | ||||||
|  | 
 | ||||||
|  | # Command-line interface: | ||||||
|  | 
 | ||||||
|  | ``` | ||||||
|  | page2tsv [OPTIONS] PAGE_XML_FILE TSV_OUT_FILE | ||||||
|  | 
 | ||||||
|  | Options: | ||||||
|  |   --purpose [NERD|OCR]      Purpose of output tsv file. | ||||||
|  |                              | ||||||
|  |                             NERD: NER/NED application/ground-truth creation. | ||||||
|  |                              | ||||||
|  |                             OCR: OCR application/ground-truth creation. | ||||||
|  |                              | ||||||
|  |                             default: NERD. | ||||||
|  |   --image-url TEXT | ||||||
|  |   --ner-rest-endpoint TEXT  REST endpoint of sbb_ner service. See | ||||||
|  |                             https://github.com/qurator-spk/sbb_ner for | ||||||
|  |                             details. Only applicable in case of NERD. | ||||||
|  |   --ned-rest-endpoint TEXT  REST endpoint of sbb_ned service. See | ||||||
|  |                             https://github.com/qurator-spk/sbb_ned for | ||||||
|  |                             details. Only applicable in case of NERD. | ||||||
|  |   --noproxy                 disable proxy. default: enabled. | ||||||
|  |   --scale-factor FLOAT      default: 1.0 | ||||||
|  |   --ned-threshold FLOAT | ||||||
|  |   --min-confidence FLOAT | ||||||
|  |   --max-confidence FLOAT | ||||||
|  |   --ned-priority INTEGER | ||||||
|  |   --help                    Show this message and exit. | ||||||
|  | 
 | ||||||
|  | ``` | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| ocrd >= 2.23.2 | ocrd >= 2.23.2 | ||||||
| pandas | pandas | ||||||
| matplotlib | matplotlib | ||||||
| qurator-sbb-tools | qurator-sbb-utils | ||||||
|  | @ -19,6 +19,7 @@ from .ocr import get_conf_color | ||||||
| from qurator.utils.ner import ner | from qurator.utils.ner import ner | ||||||
| from qurator.utils.ned import ned | from qurator.utils.ned import ned | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| @click.command() | @click.command() | ||||||
| @click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1) | @click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1) | ||||||
| @click.argument('url-file', type=click.Path(exists=False), required=True, nargs=1) | @click.argument('url-file', type=click.Path(exists=False), required=True, nargs=1) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue