| 
									
										
										
										
											2020-02-10 15:12:27 +01:00
										 |  |  | #!/bin/bash | 
					
						
							| 
									
										
										
										
											2020-02-10 19:23:17 +01:00
										 |  |  | set -e | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-10 19:23:17 +01:00
										 |  |  | self=`realpath $0` | 
					
						
							|  |  |  | self_dir=`dirname "$self"` | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-30 17:54:05 +01:00
										 |  |  | DATA_SUBDIR=data | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | get_from_annex() { | 
					
						
							| 
									
										
										
										
											2020-08-05 12:27:05 +02:00
										 |  |  |   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt*' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |   annex_get 'tesseract-models/GT4HistOCR/*.traineddata' | 
					
						
							|  |  |  |   annex_get 'textline_detection/*.h5' | 
					
						
							| 
									
										
										
										
											2020-08-05 16:03:17 +02:00
										 |  |  |   annex_get 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | get_from_web() { | 
					
						
							| 
									
										
										
										
											2020-08-05 12:27:05 +02:00
										 |  |  |   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200' | 
					
						
							|  |  |  |   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'  'tesseract-models/GT4HistOCR' | 
					
						
							| 
									
										
										
										
											2020-02-10 15:46:55 +01:00
										 |  |  |   download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz'     'textline_detection' | 
					
						
							| 
									
										
										
										
											2020-08-06 13:08:46 +02:00
										 |  |  |   download_to --no-unpack 'https://qurator-data.de/mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2020-02-10 19:23:17 +01:00
										 |  |  | . $self_dir/qurator_data_lib.sh | 
					
						
							|  |  |  | handle_data | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-26 13:18:27 +01:00
										 |  |  | docker build --cache-from my_ocrd_workflow -t my_ocrd_workflow . |