| 
									
										
										
										
											2020-02-10 15:12:27 +01:00
										 |  |  | #!/bin/bash | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-30 17:54:05 +01:00
										 |  |  | DATA_SUBDIR=data | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | get_from_annex() { | 
					
						
							| 
									
										
										
										
											2020-01-16 15:44:26 +01:00
										 |  |  |   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200/*.ckpt*' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |   annex_get 'tesseract-models/GT4HistOCR/*.traineddata' | 
					
						
							|  |  |  |   annex_get 'textline_detection/*.h5' | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | get_from_web() { | 
					
						
							| 
									
										
										
										
											2020-01-16 15:44:26 +01:00
										 |  |  |   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200' | 
					
						
							| 
									
										
										
										
											2020-01-15 17:31:44 +01:00
										 |  |  |   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'   'tesseract-models/GT4HistOCR' | 
					
						
							| 
									
										
										
										
											2020-02-10 15:46:55 +01:00
										 |  |  |   download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz'     'textline_detection' | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-21 11:54:01 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  | check_data_subdir() { | 
					
						
							|  |  |  |   result=0 | 
					
						
							| 
									
										
										
										
											2019-10-30 17:54:05 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  |   if git submodule status $DATA_SUBDIR | grep -q '^-'; then | 
					
						
							|  |  |  |     echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1 | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  |   if ! [ -e $DATA_SUBDIR/.git/annex ]; then | 
					
						
							|  |  |  |     echo "$DATA_SUBDIR/ is not a git annex repository"; result=1 | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  |   if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then | 
					
						
							|  |  |  |     echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1 | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  |   if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then | 
					
						
							|  |  |  |     echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1 | 
					
						
							|  |  |  |   fi | 
					
						
							| 
									
										
										
										
											2019-10-30 17:54:05 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  |   return $result | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2019-10-30 17:54:05 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  | suggest_commands() { | 
					
						
							|  |  |  |   echo "Suggested commands:" | 
					
						
							|  |  |  |   echo | 
					
						
							|  |  |  |   echo "git submodule update --init" | 
					
						
							|  |  |  |   echo "(cd $DATA_SUBDIR && git annex init --version=7)" | 
					
						
							|  |  |  |   echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | annex_get() { | 
					
						
							|  |  |  |   file_pattern="$1" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ( | 
					
						
							|  |  |  |     cd data | 
					
						
							|  |  |  |     git annex get $file_pattern | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # fsck seems to be necessary to fix the files if we're in a submodule | 
					
						
							|  |  |  |     git annex fsck $file_pattern | 
					
						
							|  |  |  |   ) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | download_to() { | 
					
						
							|  |  |  |   download_source="$1" | 
					
						
							|  |  |  |   unpack_to="$2" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ( | 
					
						
							|  |  |  |     cd data | 
					
						
							|  |  |  |     tmpf=`mktemp 'tmp.XXXXX'` | 
					
						
							|  |  |  |     wget -O $tmpf "$download_source" | 
					
						
							|  |  |  |     mkdir -p "$unpack_to" | 
					
						
							| 
									
										
										
										
											2020-02-07 19:42:54 +01:00
										 |  |  |     # Unpacking relies on tar -a unpacking any tar compression | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |     tar -C "$unpack_to" -af $tmpf -xv | 
					
						
							|  |  |  |     rm -f $tmpf | 
					
						
							|  |  |  |   ) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | set -e | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-10 15:00:56 +01:00
										 |  |  | if [ -n "$FORCE_GET_FROM_WEB" ]; then | 
					
						
							|  |  |  |   get_from_web | 
					
						
							|  |  |  | elif ! check_data_subdir; then | 
					
						
							| 
									
										
										
										
											2019-10-31 12:41:03 +01:00
										 |  |  |   select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  |     if [ $REPLY = 1 ]; then | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |       suggest_commands | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  |       exit | 
					
						
							|  |  |  |     else | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |       get_from_web | 
					
						
							|  |  |  |       break | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  |     fi | 
					
						
							| 
									
										
										
										
											2019-10-18 16:32:31 +02:00
										 |  |  |   done | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  | else | 
					
						
							| 
									
										
										
										
											2019-10-31 15:22:12 +01:00
										 |  |  |   get_from_annex | 
					
						
							| 
									
										
										
										
											2019-10-31 11:32:37 +01:00
										 |  |  | fi | 
					
						
							| 
									
										
										
										
											2019-08-21 11:54:01 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-20 12:25:12 +02:00
										 |  |  | docker build -t my_ocrd_workflow . |