| 
									
										
										
										
											2020-02-10 15:12:27 +01:00
										 |  |  | #!/bin/bash | 
					
						
							| 
									
										
										
										
											2020-02-10 19:23:17 +01:00
										 |  |  | set -e | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-10 19:23:17 +01:00
										 |  |  | self=`realpath $0` | 
					
						
							|  |  |  | self_dir=`dirname "$self"` | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-30 17:54:05 +01:00
										 |  |  | DATA_SUBDIR=data | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | get_from_annex() { | 
					
						
							| 
									
										
										
										
											2020-08-05 12:27:05 +02:00
										 |  |  |   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt*' | 
					
						
							| 
									
										
										
										
											2020-11-17 10:00:38 +01:00
										 |  |  |   annex_get 'calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt*' | 
					
						
							| 
									
										
										
										
											2020-11-30 17:52:24 +01:00
										 |  |  |   annex_get 'calamari-models/GT4HistOCR/2019-12-18T17_24+0100*/*.ckpt*' | 
					
						
							|  |  |  |   annex_get 'mirror/github.com/Calamari-OCR/calamari_models/gt4histocr/*.ckpt*' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |   annex_get 'tesseract-models/GT4HistOCR/*.traineddata' | 
					
						
							|  |  |  |   annex_get 'textline_detection/*.h5' | 
					
						
							| 
									
										
										
										
											2020-08-05 16:03:17 +02:00
										 |  |  |   annex_get 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' | 
					
						
							| 
									
										
										
										
											2020-10-22 21:08:13 +02:00
										 |  |  |   annex_get 'sbb_binarization/*.h5' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | get_from_web() { | 
					
						
							| 
									
										
										
										
											2020-08-05 12:27:05 +02:00
										 |  |  |   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200' | 
					
						
							| 
									
										
										
										
											2020-11-19 17:27:31 +01:00
										 |  |  |   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz' 'calamari-models/GT4HistOCR/2019-12-11T11_10+0100' | 
					
						
							| 
									
										
										
										
											2020-08-05 12:27:05 +02:00
										 |  |  |   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'  'tesseract-models/GT4HistOCR' | 
					
						
							| 
									
										
										
										
											2020-02-10 15:46:55 +01:00
										 |  |  |   download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz'     'textline_detection' | 
					
						
							| 
									
										
										
										
											2020-10-22 21:08:13 +02:00
										 |  |  |   download_to --strip-components 1 'https://qurator-data.de/sbb_binarization/models.tar.gz' 'sbb_binarization' | 
					
						
							| 
									
										
										
										
											2020-08-06 13:08:46 +02:00
										 |  |  |   download_to --no-unpack 'https://qurator-data.de/mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2020-02-10 19:23:17 +01:00
										 |  |  | . $self_dir/qurator_data_lib.sh | 
					
						
							|  |  |  | handle_data | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-27 16:55:35 +01:00
										 |  |  | if [ -n "$1" ]; then | 
					
						
							|  |  |  |   sub_images=`echo "$@" | sed 's/Dockerfile-//'` | 
					
						
							|  |  |  | else | 
					
						
							|  |  |  |   sub_images=`ls -1 Dockerfile-core* | sed 's/Dockerfile-//'` | 
					
						
							|  |  |  |   sub_images="$sub_images `ls -1 Dockerfile-* | sed 's/Dockerfile-//'`" | 
					
						
							|  |  |  | fi | 
					
						
							| 
									
										
										
										
											2021-01-15 20:19:45 +01:00
										 |  |  | for sub_image in $sub_images; do | 
					
						
							| 
									
										
										
										
											2021-02-15 17:10:27 +01:00
										 |  |  |   docker build --cache-from=quratorspk/ocrd-galley-$sub_image -t quratorspk/ocrd-galley-$sub_image -f Dockerfile-$sub_image . | 
					
						
							| 
									
										
										
										
											2021-01-15 20:19:45 +01:00
										 |  |  | done |