mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-30 02:34:13 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			24 lines
		
	
	
	
		
			1 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			24 lines
		
	
	
	
		
			1 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
| #!/bin/bash
 | |
| set -e
 | |
| 
 | |
| self=`realpath $0`
 | |
| self_dir=`dirname "$self"`
 | |
| 
 | |
| DATA_SUBDIR=data
 | |
| get_from_annex() {
 | |
|   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt*'
 | |
|   annex_get 'tesseract-models/GT4HistOCR/*.traineddata'
 | |
|   annex_get 'textline_detection/*.h5'
 | |
|   annex_get 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz'
 | |
| }
 | |
| get_from_web() {
 | |
|   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200'
 | |
|   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'  'tesseract-models/GT4HistOCR'
 | |
|   download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz'     'textline_detection'
 | |
|   download_to --no-unpack 'https://qurator-data.de/mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz'
 | |
| }
 | |
| . $self_dir/qurator_data_lib.sh
 | |
| handle_data
 | |
| 
 | |
| 
 | |
| docker build --cache-from my_ocrd_workflow -t my_ocrd_workflow .
 |